VYPR
Moderate severityNVD Advisory· Published May 30, 2025· Updated May 30, 2025

vLLM has a Regular Expression Denial of Service (ReDoS, Exponential Complexity) Vulnerability in `pythonic_tool_parser.py`

CVE-2025-48887

Description

vLLM, an inference and serving engine for large language models (LLMs), has a Regular Expression Denial of Service (ReDoS) vulnerability in the file vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py of versions 0.6.4 up to but excluding 0.9.0. The root cause is the use of a highly complex and nested regular expression for tool call detection, which can be exploited by an attacker to cause severe performance degradation or make the service unavailable. The pattern contains multiple nested quantifiers, optional groups, and inner repetitions which make it vulnerable to catastrophic backtracking. Version 0.9.0 contains a patch for the issue.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
vllmPyPI
>= 0.6.4, < 0.9.00.9.0

Affected products

1

Patches

1
4fc1bf813ad8

[Bugfix] Migrate to REGEX Library to prevent catastrophic backtracking (#18454)

https://github.com/vllm-project/vllmFeng XiaoLongMay 23, 2025via ghsa
52 files changed · +62 58
  • benchmarks/benchmark_serving_structured_output.py+2 2 modified
    @@ -672,7 +672,7 @@ def process_one_metric(
     def evaluate(ret, args):
         def _eval_correctness_json(expected, actual):
             # extract json string from string using regex
    -        import re
    +        import regex as re
     
             actual = actual.replace("\n", "").replace(" ", "").strip()
             try:
    @@ -687,7 +687,7 @@ def _eval_correctness_choice(expected, actual):
             return actual in args.choice
     
         def _eval_correctness_regex(expected, actual):
    -        import re
    +        import regex as re
     
             return re.match(args.regex, actual) is not None
     
    
  • benchmarks/kernels/graph_machete_bench.py+1 1 modified
    @@ -2,11 +2,11 @@
     
     import math
     import pickle
    -import re
     from collections import defaultdict
     
     import matplotlib.pyplot as plt
     import pandas as pd
    +import regex as re
     import seaborn as sns
     from torch.utils.benchmark import Measurement as TMeasurement
     
    
  • examples/offline_inference/prithvi_geospatial_mae.py+1 1 modified
    @@ -20,12 +20,12 @@
     import argparse
     import datetime
     import os
    -import re
     from typing import Union
     
     import albumentations
     import numpy as np
     import rasterio
    +import regex as re
     import torch
     from einops import rearrange
     from terratorch.datamodules import Sen1Floods11NonGeoDataModule
    
  • .github/scripts/cleanup_pr_body.sh+1 1 modified
    @@ -26,7 +26,7 @@ sed -i '/\*\*BEFORE SUBMITTING, PLEASE READ.*\*\*/,$d' "${NEW}"
     
     # Remove HTML <details> section that includes <summary> text of "PR Checklist (Click to Expand)"
     python3 - <<EOF
    -import re
    +import regex as re
     
     with open("${NEW}", "r") as file:
         content = file.read()
    
  • pyproject.toml+1 0 modified
    @@ -8,6 +8,7 @@ requires = [
         "setuptools-scm>=8.0",
         "torch == 2.7.0",
         "wheel",
    +    "regex",
         "jinja2",
     ]
     build-backend = "setuptools.build_meta"
    
  • requirements/build.txt+1 0 modified
    @@ -7,3 +7,4 @@ setuptools-scm>=8
     torch==2.7.0
     wheel
     jinja2>=3.1.6
    +regex
    
  • requirements/common.txt+1 0 modified
    @@ -1,3 +1,4 @@
    +regex # Replace re for higher-performance regex matching
     cachetools
     psutil
     sentencepiece  # Required for LLaMA tokenizer.
    
  • requirements/nightly_torch_test.txt+1 1 modified
    @@ -38,4 +38,4 @@ matplotlib # required for qwen-vl test
     # required for  Multi-Modal Models Test (Standard)
     num2words # required for smolvlm test
     pqdm
    -timm # required for internvl test
    +timm # required for internvl test
    \ No newline at end of file
    
  • setup.py+1 2 modified
    @@ -5,12 +5,12 @@
     import json
     import logging
     import os
    -import re
     import subprocess
     import sys
     from pathlib import Path
     from shutil import which
     
    +import regex as re
     import torch
     from packaging.version import Version, parse
     from setuptools import Extension, setup
    @@ -389,7 +389,6 @@ def run(self) -> None:
                 # vllm_flash_attn python code:
                 # Regex from
                 #  `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
    -            import re
                 compiled_regex = re.compile(
                     r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
                 file_members += list(
    
  • tests/entrypoints/llm/test_guided_generate.py+1 1 modified
    @@ -1,12 +1,12 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     import weakref
     from enum import Enum
     
     import jsonschema
     import pytest
    +import regex as re
     from pydantic import BaseModel
     
     from vllm.distributed import cleanup_dist_env_and_memory
    
  • tests/entrypoints/openai/test_chat.py+1 1 modified
    @@ -2,13 +2,13 @@
     
     # imports for guided decoding tests
     import json
    -import re
     from typing import Optional
     
     import jsonschema
     import openai  # use the official client for correctness check
     import pytest
     import pytest_asyncio
    +import regex as re
     import requests
     import torch
     from openai import BadRequestError, OpenAI
    
  • tests/entrypoints/openai/test_completion.py+1 2 modified
    @@ -1,8 +1,6 @@
     # SPDX-License-Identifier: Apache-2.0
    -
     # imports for guided decoding tests
     import json
    -import re
     import shutil
     from tempfile import TemporaryDirectory
     from typing import Optional
    @@ -11,6 +9,7 @@
     import openai  # use the official client for correctness check
     import pytest
     import pytest_asyncio
    +import regex as re
     # downloading lora to test lora requests
     from huggingface_hub import snapshot_download
     from openai import BadRequestError
    
  • tests/entrypoints/openai/test_prompt_validation.py+6 6 modified
    @@ -1,10 +1,9 @@
     # SPDX-License-Identifier: Apache-2.0
     
     # imports for guided decoding tests
    -import re
    -
     import openai
     import pytest
    +import regex as re
     
     from ...utils import RemoteOpenAIServer
     
    @@ -32,7 +31,7 @@ async def test_out_of_vocab_token_ids():
             client = remote_server.get_async_client()
     
             with pytest.raises(openai.BadRequestError,
    -                           match=re.compile('.*out of vocabulary.*')):
    +                           match=re.compile('.*out of vocabulary.*').pattern):
                 await client.completions.create(model=model_name,
                                                 prompt=[999999],
                                                 max_tokens=5,
    @@ -46,9 +45,10 @@ async def test_reject_multistep_with_guided_decoding():
         with RemoteOpenAIServer(model_name, server_args) as remote_server:
             client = remote_server.get_async_client()
     
    -        with pytest.raises(openai.BadRequestError,
    -                           match=re.compile(
    -                               '.*Guided decoding .* multi-step decoding.*')):
    +        with pytest.raises(
    +                openai.BadRequestError,
    +                match=re.compile(
    +                    '.*Guided decoding .* multi-step decoding.*').pattern):
                 await client.completions.create(
                     model=model_name,
                     prompt="Hello",
    
  • tests/models/multimodal/generation/test_phi4mm.py+1 1 modified
    @@ -1,12 +1,12 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import os
    -import re
     from collections.abc import Sequence
     from typing import Optional
     
     import librosa
     import pytest
    +import regex as re
     from huggingface_hub import snapshot_download
     from transformers import AutoTokenizer
     
    
  • tests/models/multimodal/generation/vlm_utils/model_utils.py+1 1 modified
    @@ -3,11 +3,11 @@
     for manipulating the input / output of HF & vLLM test runners, which are
     typically specific to a small subset of models.
     """
    -import re
     import types
     from pathlib import PosixPath
     from typing import Optional, Union
     
    +import regex as re
     import torch
     from PIL.Image import Image
     from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
    
  • tests/tool_use/test_tool_choice_required.py+2 2 modified
    @@ -1,10 +1,10 @@
     # SPDX-License-Identifier: Apache-2.0
     import json
    -import re
     from copy import deepcopy
     from unittest.mock import MagicMock
     
     import pytest
    +import regex as re
     from pydantic import TypeAdapter
     
     from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
    @@ -333,4 +333,4 @@ def test_streaming_output_valid(output, empty_params, delta_len):
                 combined_messages += message.tool_calls[0].function.arguments
         combined_messages += "}]"
         assert json.loads(combined_messages) == output
    -    assert json.dumps(json.loads(combined_messages)) == output_json
    +    assert json.dumps(json.loads(combined_messages)) == output_json
    \ No newline at end of file
    
  • tests/v1/entrypoints/llm/test_struct_output_generate.py+1 1 modified
    @@ -4,12 +4,12 @@
     from __future__ import annotations
     
     import json
    -import re
     from enum import Enum
     from typing import TYPE_CHECKING, Any
     
     import jsonschema
     import pytest
    +import regex as re
     from pydantic import BaseModel
     
     from tests.reasoning.utils import run_reasoning_extraction
    
  • tests/v1/entrypoints/openai/test_completion.py+1 1 modified
    @@ -1,11 +1,11 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
     from typing import Optional
     
     import openai  # use the official client for correctness check
     import pytest
     import pytest_asyncio
    +import regex as re
     from openai import BadRequestError
     
     from tests.utils import RemoteOpenAIServer
    
  • tests/v1/sample/utils.py+2 1 modified
    @@ -1,9 +1,10 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
     from enum import Enum
     from typing import Optional
     
    +import regex as re
    +
     from vllm import CompletionOutput
     
     
    
  • vllm/collect_env.py+1 1 modified
    @@ -815,4 +815,4 @@ def main():
     
     
     if __name__ == '__main__':
    -    main()
    +    main()
    \ No newline at end of file
    
  • vllm/config.py+1 1 modified
    @@ -6,7 +6,6 @@
     import hashlib
     import inspect
     import json
    -import re
     import textwrap
     import uuid
     import warnings
    @@ -20,6 +19,7 @@
     from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
                         Protocol, TypeVar, Union, cast, get_args, get_origin)
     
    +import regex as re
     import torch
     from torch.distributed import ProcessGroup, ReduceOp
     from transformers import PretrainedConfig
    
  • vllm/engine/arg_utils.py+1 1 modified
    @@ -4,7 +4,6 @@
     import argparse
     import dataclasses
     import json
    -import re
     import sys
     import threading
     import warnings
    @@ -13,6 +12,7 @@
     from typing import (Annotated, Any, Callable, Dict, List, Literal, Optional,
                         Type, TypeVar, Union, cast, get_args, get_origin)
     
    +import regex as re
     import torch
     from typing_extensions import TypeIs, deprecated
     
    
  • vllm/entrypoints/openai/api_server.py+1 1 modified
    @@ -7,7 +7,6 @@
     import inspect
     import multiprocessing
     import os
    -import re
     import signal
     import socket
     import tempfile
    @@ -21,6 +20,7 @@
     from typing import Annotated, Optional, Union
     
     import prometheus_client
    +import regex as re
     import uvloop
     from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
     from fastapi.exceptions import RequestValidationError
    
  • vllm/entrypoints/openai/protocol.py+1 1 modified
    @@ -3,11 +3,11 @@
     # Adapted from
     # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
     import json
    -import re
     import time
     from http import HTTPStatus
     from typing import Annotated, Any, ClassVar, Literal, Optional, Union
     
    +import regex as re
     import torch
     from fastapi import HTTPException, UploadFile
     from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
    
  • vllm/entrypoints/openai/serving_chat.py+1 1 modified
    @@ -2,14 +2,14 @@
     
     import asyncio
     import json
    -import re
     import time
     from collections.abc import AsyncGenerator, AsyncIterator
     from collections.abc import Sequence as GenericSequence
     from typing import Callable, Final, Optional, Union
     
     import jinja2
     import partial_json_parser
    +import regex as re
     from fastapi import Request
     from pydantic import TypeAdapter
     
    
  • vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py+2 1 modified
    @@ -1,9 +1,10 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
     from collections.abc import Sequence
     from typing import Union
     
    +import regex as re
    +
     from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                                   DeltaFunctionCall, DeltaMessage,
                                                   DeltaToolCall,
    
  • vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py+1 1 modified
    @@ -1,12 +1,12 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     from collections.abc import Sequence
     from json import JSONDecoder
     from typing import Union
     
     import partial_json_parser
    +import regex as re
     from partial_json_parser.core.options import Allow
     
     from vllm.entrypoints.chat_utils import random_tool_call_id
    
  • vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py+1 1 modified
    @@ -1,11 +1,11 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     from collections.abc import Sequence
     from typing import Union
     
     import partial_json_parser
    +import regex as re
     from partial_json_parser.core.options import Allow
     
     from vllm.entrypoints.chat_utils import random_tool_call_id
    
  • vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py+1 1 modified
    @@ -1,11 +1,11 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     from collections.abc import Sequence
     from typing import Union
     
     import partial_json_parser
    +import regex as re
     from partial_json_parser.core.options import Allow
     
     from vllm.entrypoints.chat_utils import random_tool_call_id
    
  • vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py+1 1 modified
    @@ -1,12 +1,12 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     from collections.abc import Sequence
     from json import JSONDecoder
     from typing import Union
     
     import partial_json_parser
    +import regex as re
     from partial_json_parser.core.options import Allow
     from transformers import PreTrainedTokenizerBase
     
    
  • vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py+1 1 modified
    @@ -1,13 +1,13 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     from collections.abc import Sequence
     from random import choices
     from string import ascii_letters, digits
     from typing import Union
     
     import partial_json_parser
    +import regex as re
     from partial_json_parser.core.options import Allow
     from pydantic import Field
     
    
  • vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py+1 1 modified
    @@ -1,10 +1,10 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import json
    -import re
     from collections.abc import Sequence
     from typing import Any, Optional
     
    +import regex as re
     from transformers import PreTrainedTokenizerBase
     
     from vllm.entrypoints.chat_utils import random_tool_call_id
    
  • vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py+1 1 modified
    @@ -2,10 +2,10 @@
     
     import ast
     import json
    -import re
     from collections.abc import Sequence
     from typing import Any, Union
     
    +import regex as re
     from transformers import PreTrainedTokenizerBase
     
     from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
    
  • vllm/lora/models.py+1 1 modified
    @@ -3,11 +3,11 @@
     import copy
     import math
     import os
    -import re
     from collections.abc import Sequence
     from dataclasses import dataclass, field
     from typing import Any, Callable, Optional, Union
     
    +import regex as re
     import safetensors.torch
     import torch
     from torch import nn
    
  • vllm/lora/utils.py+1 1 modified
    @@ -1,10 +1,10 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import os
    -import re
     from typing import Optional, Union
     
     import huggingface_hub
    +import regex as re
     from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
                                        HFValidationError, RepositoryNotFoundError)
     from torch import nn
    
  • vllm/model_executor/guided_decoding/utils.py+1 1 modified
    @@ -1,6 +1,6 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
    +import regex as re
     
     
     def has_xgrammar_unsupported_json_features(schema: dict) -> bool:
    
  • vllm/model_executor/guided_decoding/xgrammar_decoding.py+1 1 modified
    @@ -4,10 +4,10 @@
     from __future__ import annotations
     
     import json
    -import re
     from dataclasses import dataclass, field
     from typing import TYPE_CHECKING, Any
     
    +import regex as re
     import torch
     
     import vllm.envs
    
  • vllm/model_executor/layers/quantization/compressed_tensors/utils.py+1 1 modified
    @@ -1,10 +1,10 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
     from collections.abc import Iterable, Mapping
     from types import MappingProxyType
     from typing import Optional
     
    +import regex as re
     from compressed_tensors import CompressionFormat
     from torch.nn import Module
     
    
  • vllm/model_executor/layers/quantization/modelopt.py+1 1 modified
    @@ -228,7 +228,7 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptNvFp4Config":
                        exclude_modules, group_size)
     
         def is_layer_excluded(self, prefix: str, exclude_modules: list):
    -        import re
    +        import regex as re
             for pattern in exclude_modules:
                 regex_str = pattern.replace('.', r'\.').replace('*', r'.*')
                 if re.fullmatch(regex_str, prefix):
    
  • vllm/model_executor/layers/quantization/quark/utils.py+2 1 modified
    @@ -1,10 +1,11 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
     from collections.abc import Iterable, Mapping
     from types import MappingProxyType
     from typing import Any, Optional
     
    +import regex as re
    +
     
     def deep_compare(dict1: Any, dict2: Any) -> bool:
         if type(dict1) is not type(dict2):
    
  • vllm/model_executor/layers/quantization/utils/gptq_utils.py+1 1 modified
    @@ -1,8 +1,8 @@
     # SPDX-License-Identifier: Apache-2.0
    -import re
     from copy import deepcopy
     from typing import Optional, Union
     
    +import regex as re
     import torch
     
     from vllm.config import QuantizationConfig
    
  • vllm/model_executor/model_loader/tensorizer.py+1 1 modified
    @@ -7,14 +7,14 @@
     import io
     import json
     import os
    -import re
     import threading
     import time
     from collections.abc import Generator
     from dataclasses import dataclass
     from functools import partial
     from typing import Any, BinaryIO, Optional, Union
     
    +import regex as re
     import torch
     from torch import nn
     from torch.utils._python_dispatch import TorchDispatchMode
    
  • vllm/model_executor/models/mimo_mtp.py+1 1 modified
    @@ -250,7 +250,7 @@ def load_weights(self, weights: Iterable[tuple[str,
             return loaded_params
     
         def map_model_name_to_mtp_param_name(self, name: str) -> str:
    -        import re
    +        import regex as re
             name_without_prefix = [
                 "token_layernorm", "hidden_layernorm", "input_proj",
                 "final_layernorm"
    
  • vllm/model_executor/models/minimax_text_01.py+1 1 modified
    @@ -2,10 +2,10 @@
     """Inference-only MiniMaxText01 model."""
     import copy
     import math
    -import re
     from collections.abc import Iterable
     from typing import Optional, Union
     
    +import regex as re
     import torch
     import torch.distributed
     import torch.nn.functional as F
    
  • vllm/model_executor/models/phi3v.py+1 1 modified
    @@ -14,10 +14,10 @@
     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     # See the License for the specific language governing permissions and
     # limitations under the License.
    -import re
     from collections.abc import Iterable, Mapping, Sequence
     from typing import Any, Literal, Optional, TypedDict, Union
     
    +import regex as re
     import torch
     import torch.nn as nn
     from transformers import (BatchFeature, CLIPVisionConfig, PretrainedConfig,
    
  • vllm/model_executor/models/qwen_vl.py+1 1 modified
    @@ -7,12 +7,12 @@
     
     import copy
     import math
    -import re
     import unicodedata
     from collections.abc import Collection, Mapping, Sequence, Set
     from functools import lru_cache, partial
     from typing import Callable, Literal, Optional, TypedDict, Union
     
    +import regex as re
     import torch
     from torch import nn
     from torchvision import transforms
    
  • vllm/model_executor/models/transformers.py+1 1 modified
    @@ -14,11 +14,11 @@
     # See the License for the specific language governing permissions and
     # limitations under the License.
     """Wrapper around `transformers` models"""
    -import re
     from collections.abc import Iterable
     from contextlib import nullcontext
     from typing import Literal, Optional, Union
     
    +import regex as re
     import torch
     from torch import nn
     from transformers import AutoModel, PretrainedConfig, PreTrainedModel
    
  • vllm/multimodal/processing.py+1 1 modified
    @@ -1,6 +1,5 @@
     # SPDX-License-Identifier: Apache-2.0
     import json
    -import re
     import sys
     from abc import ABC, abstractmethod
     from collections import defaultdict
    @@ -12,6 +11,7 @@
     from typing import (TYPE_CHECKING, Generic, NamedTuple, Optional, Protocol,
                         TypeVar, Union, cast)
     
    +import regex as re
     import torch
     from typing_extensions import assert_never
     
    
  • vllm/reasoning/granite_reasoning_parser.py+1 1 modified
    @@ -1,9 +1,9 @@
     # SPDX-License-Identifier: Apache-2.0
     
    -import re
     from collections.abc import Sequence
     from typing import Optional, Union
     
    +import regex as re
     from transformers import PreTrainedTokenizerBase
     
     from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
    
  • vllm/transformers_utils/tokenizers/mistral.py+1 1 modified
    @@ -1,12 +1,12 @@
     # SPDX-License-Identifier: Apache-2.0
     
     import os
    -import re
     from dataclasses import dataclass
     from pathlib import Path
     from typing import TYPE_CHECKING, Any, Optional, Union, cast
     
     import huggingface_hub
    +import regex as re
     from huggingface_hub import HfApi, hf_hub_download
     
     from vllm.logger import init_logger
    
  • vllm/utils.py+1 1 modified
    @@ -19,7 +19,6 @@
     import multiprocessing
     import os
     import pickle
    -import re
     import signal
     import socket
     import subprocess
    @@ -54,6 +53,7 @@
     import numpy as np
     import numpy.typing as npt
     import psutil
    +import regex as re
     import torch
     import torch.types
     import yaml
    
  • vllm/v1/structured_output/utils.py+1 1 modified
    @@ -2,7 +2,7 @@
     
     from __future__ import annotations
     
    -import re
    +import regex as re
     
     
     def grammar_is_likely_lark(grammar_str: str) -> bool:
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.