Object Relational Mapper Leak Vulnerability in Filtering Task in Label Studio
Description
Label Studio is an open source data labeling tool. In all current versions of Label Studio prior to 1.9.2post0, the application allows users to insecurely set filters for filtering tasks. An attacker can construct a filter chain to filter tasks based on sensitive fields for all user accounts on the platform by exploiting Django's Object Relational Mapper (ORM). Since the results of query can be manipulated by the ORM filter, an attacker can leak these sensitive fields character by character. In addition, Label Studio had a hard coded secret key that an attacker can use to forge a session token of any user by exploiting this ORM Leak vulnerability to leak account password hashes. This vulnerability has been addressed in commit f931d9d129 which is included in the 1.9.2post0 release. Users are advised to upgrade. There are no known workarounds for this vulnerability.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
label-studioPyPI | < 1.9.2.post0 | 1.9.2.post0 |
Affected products
1- Range: < 1.9.2post0
Patches
1f931d9d12900fix: LEAP-237: Patch ORM Leak vulnerability in open source (#5012)
5 files changed · +351 −10
label_studio/core/settings/base.py+7 −1 modified@@ -15,7 +15,7 @@ import re from datetime import timedelta -from label_studio.core.utils.params import get_bool_env +from label_studio.core.utils.params import get_bool_env, get_env_list formatter = 'standard' JSON_LOG = get_bool_env('JSON_LOG', False) @@ -667,3 +667,9 @@ def collect_versions_dummy(**kwargs): REAL_HOSTNAME = os.getenv('HOSTNAME') # we have to use getenv, because we don't use LABEL_STUDIO_ prefix GCS_CLOUD_STORAGE_FORCE_DEFAULT_CREDENTIALS = get_bool_env('GCS_CLOUD_STORAGE_FORCE_DEFAULT_CREDENTIALS', False) PUBLIC_API_DOCS = get_bool_env('PUBLIC_API_DOCS', False) + +# By default, we disallow filters with foreign keys in data manager for security reasons. +# Add to this list (either here in code, or via the env) to allow specific filters that rely on foreign keys. +DATA_MANAGER_FILTER_ALLOWLIST = list( + set(get_env_list('DATA_MANAGER_FILTER_ALLOWLIST') + ['updated_by__active_organization']) +)
label_studio/core/utils/params.py+15 −3 modified@@ -1,4 +1,5 @@ import os +from typing import Callable, Optional, Sequence, TypeVar from rest_framework.exceptions import ValidationError @@ -125,16 +126,27 @@ def get_bool_env(key, default): return get_env(key, default, is_bool=True) -def get_env_list_int(key, default=None): +T = TypeVar('T') + + +def get_env_list( + key: str, default: Optional[Sequence[T]] = None, value_transform: Callable[[str], T] = str +) -> Sequence[T]: """ - "1,2,3" in env variable => [1, 2, 3] in python + "foo,bar,baz" in env variable => ["foo", "bar", "baz"] in python. + Use value_transform to convert the strings to any other type. """ value = get_env(key) if not value: if default is None: return [] return default - return [int(el) for el in value.split(',')] + + return [value_transform(el) for el in value.split(',')] + + +def get_env_list_int(key, default=None) -> Sequence[int]: + return get_env_list(key, default=default, value_transform=int) def get_all_env_with_prefix(prefix=None, is_bool=True, default_value=None):
label_studio/data_manager/functions.py+32 −5 modified@@ -2,6 +2,7 @@ """ import logging from collections import OrderedDict +from typing import Tuple from urllib.parse import unquote import ujson as json @@ -337,11 +338,37 @@ def preprocess_filter(_filter, *_): return _filter -def preprocess_field_name(raw_field_name, only_undefined_field=False): - field_name = raw_field_name.replace('filter:', '') - field_name = field_name.replace('tasks:', '') - ascending = False if field_name[0] == '-' else True # detect direction - field_name = field_name[1:] if field_name[0] == '-' else field_name # remove direction +def preprocess_field_name(raw_field_name, only_undefined_field=False) -> Tuple[str, bool]: + """Transform a field name (as specified in the datamanager views endpoint) to + a django ORM field name. Also handle dotted accesses to task.data. + + Edit with care; it's critical that this function not be changed in ways that + introduce vulnerabilities in the vein of the ORM Leak (see #5012). In particular + it is not advisable to use `replace` or other calls that replace all instances + of a string within this function. + + Returns: Django ORM field name: str, Sort is ascending: bool + """ + + field_name = raw_field_name + ascending = True + + # Descending marker `-` may come at the beginning of the string + if field_name.startswith('-'): + ascending = False + field_name = field_name[1:] + + # For security reasons, these must only be removed when they fall at the beginning of the string (or after `-`). + optional_prefixes = ['filter:', 'tasks:'] + for prefix in optional_prefixes: + if field_name.startswith(prefix): + field_name = field_name[len(prefix) :] + + # Descending marker may also come after other prefixes. Double negative is not allowed. + if ascending and field_name.startswith('-'): + ascending = False + field_name = field_name[1:] + if field_name.startswith('data.'): if only_undefined_field: field_name = f'data__{settings.DATA_UNDEFINED_NAME}'
label_studio/data_manager/serializers.py+44 −0 modified@@ -4,6 +4,7 @@ import ujson as json from data_manager.models import Filter, FilterGroup, View +from django.conf import settings from django.db import transaction from projects.models import Project from rest_framework import serializers @@ -18,6 +19,49 @@ class Meta: model = Filter fields = '__all__' + def validate_column(self, column: str) -> str: + """ + Ensure that the passed filter expression starts with 'filter:tasks:' and contains + no foreign key traversals. This means either the filter expression contains no '__' + substrings, or that it's the task.data json field that's accessed. + + Users depending on foreign key traversals in views can allowlist them via the + DATA_MANAGER_FILTER_ALLOWLIST setting in the env. + + Edit with care. The validations below are critical for security. + """ + + column_copy = column + + # We may support 'filter:annotations:' in the future, but we don't as of yet. + required_prefix = 'filter:tasks:' + optional_prefix = '-' + + if not column_copy.startswith(required_prefix): + raise serializers.ValidationError(f'Filter "{column}" should start with "{required_prefix}"') + + column_copy = column_copy[len(required_prefix) :] + + if column_copy.startswith(optional_prefix): + column_copy = column_copy[len(optional_prefix) :] + + if column_copy.startswith('data.'): + # Allow underscores if the filter is based on the `task.data` JSONField, because these don't leverage foreign keys. + return column + + # Specific filters relying on foreign keys can be allowlisted + if column_copy in settings.DATA_MANAGER_FILTER_ALLOWLIST: + return column + + # But in general, we don't allow foreign keys + if '__' in column_copy: + raise serializers.ValidationError( + f'"__" is not generally allowed in filters. Consider asking your administrator to add "{column_copy}" ' + 'to DATA_MANAGER_FILTER_ALLOWLIST, but note that some filter expressions may pose a security risk' + ) + + return column + class FilterGroupSerializer(serializers.ModelSerializer): filters = FilterSerializer(many=True)
label_studio/tests/data_manager/api_tasks.tavern.yml+253 −1 modified@@ -1124,7 +1124,7 @@ marks: - usefixtures: - django_live_url stages: - + - id: signup type: ref @@ -1479,3 +1479,255 @@ stages: "total_predictions": 0, "total": 1, } + +--- + +# See FilterSerializer; test logic preventing exploit that traverses the ORM +# to leak sensitive data character-by-character +test_name: tasks_api_filter_security_restrictions +strict: false +marks: +- usefixtures: + - django_live_url +stages: + +- id: signup + type: ref + +- id: create_project + name: create_project + request: + data: + title: Test Draft 1 + show_collab_predictions: true + method: POST + url: '{django_live_url}/api/projects' + response: + save: + json: + project_pk: id + created_by: created_by.id + status_code: 201 + +- name: create view with exploit filter should fail + request: + method: POST + url: '{django_live_url}/api/dm/views' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "filter:tasks:updated_by__sensitive_field" + operator: empty + value: "true" + type: String + response: + status_code: 400 + json: + validation_errors: + filter_group: + filters: + - column: + - '"__" is not generally allowed in filters. Consider asking your administrator to add "updated_by__sensitive_field" to DATA_MANAGER_FILTER_ALLOWLIST, but note that some filter expressions may pose a security risk' + +- name: create view with allowlisted filter should succeed + request: + method: POST + url: '{django_live_url}/api/dm/views' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "filter:tasks:updated_by__active_organization" + operator: empty + value: "true" + type: String + response: + status_code: 201 + save: + json: + view_pk: id + +- name: change filter to typical column succeeds + request: + method: PUT + url: '{django_live_url}/api/dm/views/{view_pk}?interaction=filter&project={project_pk}' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "filter:tasks:updated_by" + operator: empty + value: "true" + type: String + response: + status_code: 200 + +- name: change filter to unexpected prefix fails + request: + method: PUT + url: '{django_live_url}/api/dm/views/{view_pk}?interaction=filter&project={project_pk}' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "tasks:filter:annotations_results" + operator: empty + value: "true" + type: String + response: + status_code: 400 + json: + validation_errors: + filter_group: + filters: + - column: + - 'Filter "tasks:filter:annotations_results" should start with "filter:tasks:"' + +- name: change filter to include direction marker succeeds + request: + method: PUT + url: '{django_live_url}/api/dm/views/{view_pk}?interaction=filter&project={project_pk}' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "filter:tasks:-annotations_results" + operator: empty + value: "true" + type: String + response: + status_code: 200 + +- name: invalid filter with direction marker fails, but suggests correct allowlist + request: + method: PUT + url: '{django_live_url}/api/dm/views/{view_pk}?interaction=filter&project={project_pk}' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "filter:tasks:-updated_by__sensitive_field" + operator: empty + value: "true" + type: String + response: + status_code: 400 + json: + validation_errors: + filter_group: + filters: + - column: + - '"__" is not generally allowed in filters. Consider asking your administrator to add "updated_by__sensitive_field" to DATA_MANAGER_FILTER_ALLOWLIST, but note that some filter expressions may pose a security risk' + +- name: change filter to dotted data field with underscores succeeds + request: + method: PUT + url: '{django_live_url}/api/dm/views/{view_pk}?interaction=filter&project={project_pk}' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "filter:tasks:data.images__0" + operator: empty + value: "true" + type: String + response: + status_code: 200 + +- name: change filter to valid column without prefix fails + request: + method: PUT + url: '{django_live_url}/api/dm/views/{view_pk}?interaction=filter&project={project_pk}' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + - filter: "annotations_results" + operator: empty + value: "true" + type: String + response: + status_code: 400 + json: + validation_errors: + filter_group: + filters: + - column: + - 'Filter "annotations_results" should start with "filter:tasks:"' + +--- + +# Regression test preventing replace-all of prefixes from being used to construct exploit filters +test_name: tasks_api_prevent_prefix_regression +strict: false +marks: +- usefixtures: + - django_live_url +stages: + +- id: signup + type: ref + +- id: create_project + name: create_project + request: + data: + title: Test Draft 1 + show_collab_predictions: true + method: POST + url: '{django_live_url}/api/projects' + response: + save: + json: + project_pk: id + created_by: created_by.id + status_code: 201 + +# Perhaps our validation should be further improved, but for now, it's +# garbage in, garbage out. +- name: create view with prefix in middle of filter should succeed + request: + method: POST + url: '{django_live_url}/api/dm/views' + json: + project: '{project_pk}' + data: + filters: + conjunction: and + items: + # in an earlier version, preprocess_field_name would have + # transformed this to updated_by__sensitive_field + - filter: "filter:tasks:updated_by_tasks:_sensitive_field" + operator: empty + value: "true" + type: String + response: + status_code: 201 + save: + json: + view_pk: id + +- name: get_tasks fails because "updated_by_tasks:_sensitive_field" doesn't resolve to a field + request: + method: GET + url: '{django_live_url}/api/tasks?view={view_pk}' + response: + # 500 actually expected here + status_code: 500
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
5- github.com/advisories/GHSA-6hjj-gq77-j4qwghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2023-47117ghsaADVISORY
- github.com/HumanSignal/label-studio/commit/f931d9d129002f54a495995774ce7384174cef5cghsax_refsource_MISCWEB
- github.com/HumanSignal/label-studio/security/advisories/GHSA-6hjj-gq77-j4qwghsax_refsource_CONFIRMWEB
- github.com/pypa/advisory-database/tree/main/vulns/label-studio/PYSEC-2023-275.yamlghsaWEB
News mentions
0No linked articles in our index yet.