vLLM has a Regular Expression Denial of Service (ReDoS, Exponential Complexity) Vulnerability in `pythonic_tool_parser.py`
Description
vLLM, an inference and serving engine for large language models (LLMs), has a Regular Expression Denial of Service (ReDoS) vulnerability in the file vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py of versions 0.6.4 up to but excluding 0.9.0. The root cause is the use of a highly complex and nested regular expression for tool call detection, which can be exploited by an attacker to cause severe performance degradation or make the service unavailable. The pattern contains multiple nested quantifiers, optional groups, and inner repetitions which make it vulnerable to catastrophic backtracking. Version 0.9.0 contains a patch for the issue.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
vllmPyPI | >= 0.6.4, < 0.9.0 | 0.9.0 |
Affected products
1Patches
14fc1bf813ad8[Bugfix] Migrate to REGEX Library to prevent catastrophic backtracking (#18454)
52 files changed · +62 −58
benchmarks/benchmark_serving_structured_output.py+2 −2 modified@@ -672,7 +672,7 @@ def process_one_metric( def evaluate(ret, args): def _eval_correctness_json(expected, actual): # extract json string from string using regex - import re + import regex as re actual = actual.replace("\n", "").replace(" ", "").strip() try: @@ -687,7 +687,7 @@ def _eval_correctness_choice(expected, actual): return actual in args.choice def _eval_correctness_regex(expected, actual): - import re + import regex as re return re.match(args.regex, actual) is not None
benchmarks/kernels/graph_machete_bench.py+1 −1 modified@@ -2,11 +2,11 @@ import math import pickle -import re from collections import defaultdict import matplotlib.pyplot as plt import pandas as pd +import regex as re import seaborn as sns from torch.utils.benchmark import Measurement as TMeasurement
examples/offline_inference/prithvi_geospatial_mae.py+1 −1 modified@@ -20,12 +20,12 @@ import argparse import datetime import os -import re from typing import Union import albumentations import numpy as np import rasterio +import regex as re import torch from einops import rearrange from terratorch.datamodules import Sen1Floods11NonGeoDataModule
.github/scripts/cleanup_pr_body.sh+1 −1 modified@@ -26,7 +26,7 @@ sed -i '/\*\*BEFORE SUBMITTING, PLEASE READ.*\*\*/,$d' "${NEW}" # Remove HTML <details> section that includes <summary> text of "PR Checklist (Click to Expand)" python3 - <<EOF -import re +import regex as re with open("${NEW}", "r") as file: content = file.read()
pyproject.toml+1 −0 modified@@ -8,6 +8,7 @@ requires = [ "setuptools-scm>=8.0", "torch == 2.7.0", "wheel", + "regex", "jinja2", ] build-backend = "setuptools.build_meta"
requirements/build.txt+1 −0 modified@@ -7,3 +7,4 @@ setuptools-scm>=8 torch==2.7.0 wheel jinja2>=3.1.6 +regex
requirements/common.txt+1 −0 modified@@ -1,3 +1,4 @@ +regex # Replace re for higher-performance regex matching cachetools psutil sentencepiece # Required for LLaMA tokenizer.
requirements/nightly_torch_test.txt+1 −1 modified@@ -38,4 +38,4 @@ matplotlib # required for qwen-vl test # required for Multi-Modal Models Test (Standard) num2words # required for smolvlm test pqdm -timm # required for internvl test +timm # required for internvl test \ No newline at end of file
setup.py+1 −2 modified@@ -5,12 +5,12 @@ import json import logging import os -import re import subprocess import sys from pathlib import Path from shutil import which +import regex as re import torch from packaging.version import Version, parse from setuptools import Extension, setup @@ -389,7 +389,6 @@ def run(self) -> None: # vllm_flash_attn python code: # Regex from # `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)` - import re compiled_regex = re.compile( r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py") file_members += list(
tests/entrypoints/llm/test_guided_generate.py+1 −1 modified@@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re import weakref from enum import Enum import jsonschema import pytest +import regex as re from pydantic import BaseModel from vllm.distributed import cleanup_dist_env_and_memory
tests/entrypoints/openai/test_chat.py+1 −1 modified@@ -2,13 +2,13 @@ # imports for guided decoding tests import json -import re from typing import Optional import jsonschema import openai # use the official client for correctness check import pytest import pytest_asyncio +import regex as re import requests import torch from openai import BadRequestError, OpenAI
tests/entrypoints/openai/test_completion.py+1 −2 modified@@ -1,8 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 - # imports for guided decoding tests import json -import re import shutil from tempfile import TemporaryDirectory from typing import Optional @@ -11,6 +9,7 @@ import openai # use the official client for correctness check import pytest import pytest_asyncio +import regex as re # downloading lora to test lora requests from huggingface_hub import snapshot_download from openai import BadRequestError
tests/entrypoints/openai/test_prompt_validation.py+6 −6 modified@@ -1,10 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # imports for guided decoding tests -import re - import openai import pytest +import regex as re from ...utils import RemoteOpenAIServer @@ -32,7 +31,7 @@ async def test_out_of_vocab_token_ids(): client = remote_server.get_async_client() with pytest.raises(openai.BadRequestError, - match=re.compile('.*out of vocabulary.*')): + match=re.compile('.*out of vocabulary.*').pattern): await client.completions.create(model=model_name, prompt=[999999], max_tokens=5, @@ -46,9 +45,10 @@ async def test_reject_multistep_with_guided_decoding(): with RemoteOpenAIServer(model_name, server_args) as remote_server: client = remote_server.get_async_client() - with pytest.raises(openai.BadRequestError, - match=re.compile( - '.*Guided decoding .* multi-step decoding.*')): + with pytest.raises( + openai.BadRequestError, + match=re.compile( + '.*Guided decoding .* multi-step decoding.*').pattern): await client.completions.create( model=model_name, prompt="Hello",
tests/models/multimodal/generation/test_phi4mm.py+1 −1 modified@@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 import os -import re from collections.abc import Sequence from typing import Optional import librosa import pytest +import regex as re from huggingface_hub import snapshot_download from transformers import AutoTokenizer
tests/models/multimodal/generation/vlm_utils/model_utils.py+1 −1 modified@@ -3,11 +3,11 @@ for manipulating the input / output of HF & vLLM test runners, which are typically specific to a small subset of models. """ -import re import types from pathlib import PosixPath from typing import Optional, Union +import regex as re import torch from PIL.Image import Image from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
tests/tool_use/test_tool_choice_required.py+2 −2 modified@@ -1,10 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from copy import deepcopy from unittest.mock import MagicMock import pytest +import regex as re from pydantic import TypeAdapter from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, @@ -333,4 +333,4 @@ def test_streaming_output_valid(output, empty_params, delta_len): combined_messages += message.tool_calls[0].function.arguments combined_messages += "}]" assert json.loads(combined_messages) == output - assert json.dumps(json.loads(combined_messages)) == output_json + assert json.dumps(json.loads(combined_messages)) == output_json \ No newline at end of file
tests/v1/entrypoints/llm/test_struct_output_generate.py+1 −1 modified@@ -4,12 +4,12 @@ from __future__ import annotations import json -import re from enum import Enum from typing import TYPE_CHECKING, Any import jsonschema import pytest +import regex as re from pydantic import BaseModel from tests.reasoning.utils import run_reasoning_extraction
tests/v1/entrypoints/openai/test_completion.py+1 −1 modified@@ -1,11 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -import re from typing import Optional import openai # use the official client for correctness check import pytest import pytest_asyncio +import regex as re from openai import BadRequestError from tests.utils import RemoteOpenAIServer
tests/v1/sample/utils.py+2 −1 modified@@ -1,9 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 -import re from enum import Enum from typing import Optional +import regex as re + from vllm import CompletionOutput
vllm/collect_env.py+1 −1 modified@@ -815,4 +815,4 @@ def main(): if __name__ == '__main__': - main() + main() \ No newline at end of file
vllm/config.py+1 −1 modified@@ -6,7 +6,6 @@ import hashlib import inspect import json -import re import textwrap import uuid import warnings @@ -20,6 +19,7 @@ from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional, Protocol, TypeVar, Union, cast, get_args, get_origin) +import regex as re import torch from torch.distributed import ProcessGroup, ReduceOp from transformers import PretrainedConfig
vllm/engine/arg_utils.py+1 −1 modified@@ -4,7 +4,6 @@ import argparse import dataclasses import json -import re import sys import threading import warnings @@ -13,6 +12,7 @@ from typing import (Annotated, Any, Callable, Dict, List, Literal, Optional, Type, TypeVar, Union, cast, get_args, get_origin) +import regex as re import torch from typing_extensions import TypeIs, deprecated
vllm/entrypoints/openai/api_server.py+1 −1 modified@@ -7,7 +7,6 @@ import inspect import multiprocessing import os -import re import signal import socket import tempfile @@ -21,6 +20,7 @@ from typing import Annotated, Optional, Union import prometheus_client +import regex as re import uvloop from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request from fastapi.exceptions import RequestValidationError
vllm/entrypoints/openai/protocol.py+1 −1 modified@@ -3,11 +3,11 @@ # Adapted from # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py import json -import re import time from http import HTTPStatus from typing import Annotated, Any, ClassVar, Literal, Optional, Union +import regex as re import torch from fastapi import HTTPException, UploadFile from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
vllm/entrypoints/openai/serving_chat.py+1 −1 modified@@ -2,14 +2,14 @@ import asyncio import json -import re import time from collections.abc import AsyncGenerator, AsyncIterator from collections.abc import Sequence as GenericSequence from typing import Callable, Final, Optional, Union import jinja2 import partial_json_parser +import regex as re from fastapi import Request from pydantic import TypeAdapter
vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py+2 −1 modified@@ -1,9 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Sequence from typing import Union +import regex as re + from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py+1 −1 modified@@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from collections.abc import Sequence from json import JSONDecoder from typing import Union import partial_json_parser +import regex as re from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py+1 −1 modified@@ -1,11 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from collections.abc import Sequence from typing import Union import partial_json_parser +import regex as re from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id
vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py+1 −1 modified@@ -1,11 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from collections.abc import Sequence from typing import Union import partial_json_parser +import regex as re from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py+1 −1 modified@@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from collections.abc import Sequence from json import JSONDecoder from typing import Union import partial_json_parser +import regex as re from partial_json_parser.core.options import Allow from transformers import PreTrainedTokenizerBase
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py+1 −1 modified@@ -1,13 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from collections.abc import Sequence from random import choices from string import ascii_letters, digits from typing import Union import partial_json_parser +import regex as re from partial_json_parser.core.options import Allow from pydantic import Field
vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py+1 −1 modified@@ -1,10 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re from collections.abc import Sequence from typing import Any, Optional +import regex as re from transformers import PreTrainedTokenizerBase from vllm.entrypoints.chat_utils import random_tool_call_id
vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py+1 −1 modified@@ -2,10 +2,10 @@ import ast import json -import re from collections.abc import Sequence from typing import Any, Union +import regex as re from transformers import PreTrainedTokenizerBase from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
vllm/lora/models.py+1 −1 modified@@ -3,11 +3,11 @@ import copy import math import os -import re from collections.abc import Sequence from dataclasses import dataclass, field from typing import Any, Callable, Optional, Union +import regex as re import safetensors.torch import torch from torch import nn
vllm/lora/utils.py+1 −1 modified@@ -1,10 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 import os -import re from typing import Optional, Union import huggingface_hub +import regex as re from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError, HFValidationError, RepositoryNotFoundError) from torch import nn
vllm/model_executor/guided_decoding/utils.py+1 −1 modified@@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -import re +import regex as re def has_xgrammar_unsupported_json_features(schema: dict) -> bool:
vllm/model_executor/guided_decoding/xgrammar_decoding.py+1 −1 modified@@ -4,10 +4,10 @@ from __future__ import annotations import json -import re from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any +import regex as re import torch import vllm.envs
vllm/model_executor/layers/quantization/compressed_tensors/utils.py+1 −1 modified@@ -1,10 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Iterable, Mapping from types import MappingProxyType from typing import Optional +import regex as re from compressed_tensors import CompressionFormat from torch.nn import Module
vllm/model_executor/layers/quantization/modelopt.py+1 −1 modified@@ -228,7 +228,7 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptNvFp4Config": exclude_modules, group_size) def is_layer_excluded(self, prefix: str, exclude_modules: list): - import re + import regex as re for pattern in exclude_modules: regex_str = pattern.replace('.', r'\.').replace('*', r'.*') if re.fullmatch(regex_str, prefix):
vllm/model_executor/layers/quantization/quark/utils.py+2 −1 modified@@ -1,10 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Iterable, Mapping from types import MappingProxyType from typing import Any, Optional +import regex as re + def deep_compare(dict1: Any, dict2: Any) -> bool: if type(dict1) is not type(dict2):
vllm/model_executor/layers/quantization/utils/gptq_utils.py+1 −1 modified@@ -1,8 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 -import re from copy import deepcopy from typing import Optional, Union +import regex as re import torch from vllm.config import QuantizationConfig
vllm/model_executor/model_loader/tensorizer.py+1 −1 modified@@ -7,14 +7,14 @@ import io import json import os -import re import threading import time from collections.abc import Generator from dataclasses import dataclass from functools import partial from typing import Any, BinaryIO, Optional, Union +import regex as re import torch from torch import nn from torch.utils._python_dispatch import TorchDispatchMode
vllm/model_executor/models/mimo_mtp.py+1 −1 modified@@ -250,7 +250,7 @@ def load_weights(self, weights: Iterable[tuple[str, return loaded_params def map_model_name_to_mtp_param_name(self, name: str) -> str: - import re + import regex as re name_without_prefix = [ "token_layernorm", "hidden_layernorm", "input_proj", "final_layernorm"
vllm/model_executor/models/minimax_text_01.py+1 −1 modified@@ -2,10 +2,10 @@ """Inference-only MiniMaxText01 model.""" import copy import math -import re from collections.abc import Iterable from typing import Optional, Union +import regex as re import torch import torch.distributed import torch.nn.functional as F
vllm/model_executor/models/phi3v.py+1 −1 modified@@ -14,10 +14,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import re from collections.abc import Iterable, Mapping, Sequence from typing import Any, Literal, Optional, TypedDict, Union +import regex as re import torch import torch.nn as nn from transformers import (BatchFeature, CLIPVisionConfig, PretrainedConfig,
vllm/model_executor/models/qwen_vl.py+1 −1 modified@@ -7,12 +7,12 @@ import copy import math -import re import unicodedata from collections.abc import Collection, Mapping, Sequence, Set from functools import lru_cache, partial from typing import Callable, Literal, Optional, TypedDict, Union +import regex as re import torch from torch import nn from torchvision import transforms
vllm/model_executor/models/transformers.py+1 −1 modified@@ -14,11 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """Wrapper around `transformers` models""" -import re from collections.abc import Iterable from contextlib import nullcontext from typing import Literal, Optional, Union +import regex as re import torch from torch import nn from transformers import AutoModel, PretrainedConfig, PreTrainedModel
vllm/multimodal/processing.py+1 −1 modified@@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 import json -import re import sys from abc import ABC, abstractmethod from collections import defaultdict @@ -12,6 +11,7 @@ from typing import (TYPE_CHECKING, Generic, NamedTuple, Optional, Protocol, TypeVar, Union, cast) +import regex as re import torch from typing_extensions import assert_never
vllm/reasoning/granite_reasoning_parser.py+1 −1 modified@@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Sequence from typing import Optional, Union +import regex as re from transformers import PreTrainedTokenizerBase from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
vllm/transformers_utils/tokenizers/mistral.py+1 −1 modified@@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 import os -import re from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Any, Optional, Union, cast import huggingface_hub +import regex as re from huggingface_hub import HfApi, hf_hub_download from vllm.logger import init_logger
vllm/utils.py+1 −1 modified@@ -19,7 +19,6 @@ import multiprocessing import os import pickle -import re import signal import socket import subprocess @@ -54,6 +53,7 @@ import numpy as np import numpy.typing as npt import psutil +import regex as re import torch import torch.types import yaml
vllm/v1/structured_output/utils.py+1 −1 modified@@ -2,7 +2,7 @@ from __future__ import annotations -import re +import regex as re def grammar_is_likely_lark(grammar_str: str) -> bool:
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
6- github.com/advisories/GHSA-w6q7-j642-7c25ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2025-48887ghsaADVISORY
- github.com/pypa/advisory-database/tree/main/vulns/vllm/PYSEC-2025-50.yamlghsaWEB
- github.com/vllm-project/vllm/commit/4fc1bf813ad80172c1db31264beaef7d93fe0601ghsax_refsource_MISCWEB
- github.com/vllm-project/vllm/pull/18454ghsax_refsource_MISCWEB
- github.com/vllm-project/vllm/security/advisories/GHSA-w6q7-j642-7c25ghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.