Incomplete validation in boosted trees code
Description
TensorFlow is an open source platform for machine learning. In affected versions the code for boosted trees in TensorFlow is still missing validation. As a result, attackers can trigger denial of service (via dereferencing nullptrs or via CHECK-failures) as well as abuse undefined behavior (binding references to nullptrs). An attacker can also read and write from heap buffers, depending on the API that gets used and the arguments that are passed to the call. Given that the boosted trees implementation in TensorFlow is unmaintained, it is recommend to no longer use these APIs. We will deprecate TensorFlow's boosted trees APIs in subsequent releases. The fix will be included in TensorFlow 2.7.0. We will also cherrypick this commit on TensorFlow 2.6.1, TensorFlow 2.5.2, and TensorFlow 2.4.4, as these are also affected and still in supported range.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
tensorflowPyPI | >= 2.6.0, < 2.6.1 | 2.6.1 |
tensorflowPyPI | >= 2.5.0, < 2.5.2 | 2.5.2 |
tensorflowPyPI | < 2.4.4 | 2.4.4 |
tensorflow-cpuPyPI | >= 2.6.0, < 2.6.1 | 2.6.1 |
tensorflow-cpuPyPI | >= 2.5.0, < 2.5.2 | 2.5.2 |
tensorflow-cpuPyPI | < 2.4.4 | 2.4.4 |
tensorflow-gpuPyPI | >= 2.6.0, < 2.6.1 | 2.6.1 |
tensorflow-gpuPyPI | >= 2.5.0, < 2.5.2 | 2.5.2 |
tensorflow-gpuPyPI | < 2.4.4 | 2.4.4 |
Affected products
1- Range: >= 2.6.0, < 2.6.1
Patches
15c8c9a8bfe75Fixing security fixes in boosted trees ops
2 files changed · +244 −6
tensorflow/core/kernels/boosted_trees/stats_ops.cc+49 −6 modified@@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { &stats_summary_list)); const int64_t num_buckets = stats_summary_list[0].dim_size(1); // Check for single logit: 1 gradient + 1 hessian value. - DCHECK_EQ(stats_summary_list[0].dim_size(2), 2); + OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2, + errors::InvalidArgument("stats_summary_list[0] must have " + "exactly 2 dimensions, obtained: ", + stats_summary_list[0].dim_size(2))); std::vector<TTypes<float, 3>::ConstTensor> stats_summary; stats_summary.reserve(stats_summary_list.size()); for (const auto& tensor : stats_summary_list) { @@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { const int32_t num_buckets = stats_summary_t->dim_size(2) - 1; const int32_t logits_dim = logits_dim_; const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim; - DCHECK_GT(hessian_dim, 0); - DCHECK_LE(hessian_dim, logits_dim * logits_dim); + OP_REQUIRES(context, hessian_dim > 0, + errors::InvalidArgument("hessian dim should be < 0, got ", + hessian_dim)); + OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim, + errors::InvalidArgument( + "hessian dim should be <= ", logits_dim * logits_dim, + " but got: ", hessian_dim)); const Tensor* l1_t; OP_REQUIRES_OK(context, context->input("l1", &l1_t)); @@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel { const int32_t logits_dim = logits_dim_; const int32_t hessian_dim = stats_summaries_list[0].dim_size(3) - logits_dim; - DCHECK_GT(hessian_dim, 0); - DCHECK_LE(hessian_dim, logits_dim * logits_dim); + OP_REQUIRES(context, hessian_dim > 0, + errors::InvalidArgument("hessian dim should be < 0, got ", + hessian_dim)); + OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim, + errors::InvalidArgument( + "hessian dim should be <= ", logits_dim * logits_dim, + " but got: ", hessian_dim)); // Vector of stats_summaries; each element is stats for feature of shape // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim]. @@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t)); const auto node_id_range = node_id_range_t->vec<int32>(); + OP_REQUIRES( + context, node_id_range.size() == 2, + errors::InvalidArgument("node_id_range should have 2 entries, got: ", + node_id_range.size())); const int32_t node_id_first = node_id_range(0); // inclusive const int32_t node_id_last = node_id_range(1); // exclusive @@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { "dims, the last value in stats_summary_shape, which was ", stats_dims, ". At index (", idx, ", 4), stats_summary_indices contains value ", stat_dim)); + OP_REQUIRES(context, stat_dim >= 0, + errors::InvalidArgument( + "Stat dim, the sum of logits dim and hessian dim in " + "stats_summary_indices, should be >= 0, which was ", + stat_dim, " at index ", idx)); std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert( FeatureMapIterator::value_type(feature_dim, BucketMap())); auto& b_map = f_insert_result.first->second; @@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel { const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); const auto gradients = gradients_t->matrix<float>(); + OP_REQUIRES( + context, node_ids.size() == gradients.dimension(0), + errors::InvalidArgument( + "node_ids size should match 0th dim of gradients. node ids " + "size: ", + node_ids.size(), ", gradients dim0: ", gradients.dimension(0))); // hessians const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); @@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel { OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); const auto gradients = gradients_t->matrix<float>(); + OP_REQUIRES( + context, node_ids.size() == gradients.dimension(0), + errors::InvalidArgument( + "node_ids size should match 0th dim of gradients. node ids " + "size: ", + node_ids.size(), ", gradients dim0: ", gradients.dimension(0))); + // hessians. const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); @@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel { for (int i = 0; i < batch_size; ++i) { const int32_t node = node_ids(i); + OP_REQUIRES(context, node >= 0, + errors::InvalidArgument( + "node_ids ", i, "th entry should be >=0, got: ", node)); for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) { const int32_t feature_value = feature(i, feature_dim); const int32_t bucket = @@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel { const int64_t stats_dims = logits_dims + hessians_dims; const int64_t num_sparse_entries = feature_indices_t->dim_size(0); const int32_t feature_dims = feature_shape(1); - DCHECK_LE(num_sparse_entries, batch_size * feature_dims); + OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims, + errors::InvalidArgument( + "feature_indices dim0 should be <= gradients dim0 * " + "feature_shape[1]. features_indices dim0: ", + num_sparse_entries, " gradients dim0: ", batch_size, + ", feature_shape[1]: ", feature_dims)); // Aggregate statistics info to map. StatsPartitionMap stats_map;
tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py+195 −0 modified@@ -17,9 +17,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import boosted_trees_ops +from tensorflow.python.ops import gen_boosted_trees_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import googletest @@ -1665,6 +1667,199 @@ def testMakeStatsSummaryNumericalPrecisionMegaBatch(self): """Tests numeric precision.""" self._verify_precision(length=50000000) + def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self): + node_id_range = [1, 2] + stats_summary_list = [[[[]]]] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + max_splits = 1 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature( + node_id_range=node_id_range, + stats_summary_list=stats_summary_list, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + max_splits=max_splits) + + def testBoostedTreesCalculateBestFeatureSplitSecurity(self): + node_id_range = [1, 2] + stats_summary = [[[[]]]] + split_type = 'equality' + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + logits_dimension = 5 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split( + node_id_range=node_id_range, + stats_summary=stats_summary, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension, + split_type=split_type) + + def testBoostedTreesCalculateBestFeatureSplitSecurity2(self): + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split( + node_id_range=[0, 8], + stats_summary=[[[[1.0], [2.0], [3.0]]]], + l1=[0.5], + l2=[0.5], + tree_complexity=[0.1], + min_node_weight=[1.0], + logits_dimension=8) + + def testBoostedTreesCalculateBestFeatureSplitV2Security(self): + node_id_range = [1, 2] + stats_summaries_list = [[[[[]]]]] + split_types = ['inequality'] + candidate_feature_ids = [1, 2, 3, 4] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + logits_dimension = 5 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2( + node_id_range=node_id_range, + stats_summaries_list=stats_summaries_list, + split_types=split_types, + candidate_feature_ids=candidate_feature_ids, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension) + + def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self): + node_id_range = [] + stats_summary_indices = [[]] + stats_summary_values = [1.0] + stats_summary_shape = [1, 1, 1, 1] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [0.5] + min_node_weight = [1.0] + logits_dimension = 3 + split_type = 'inequality' + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split( + node_id_range=node_id_range, + stats_summary_indices=stats_summary_indices, + stats_summary_values=stats_summary_values, + stats_summary_shape=stats_summary_shape, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension, + split_type=split_type) + + def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self): + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split( + node_id_range=[0, 1], + stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]], + stats_summary_values=[0.1, 0.2, 0.3], + stats_summary_shape=[1, 1, 1, 1], + l1=[0.5], + l2=[0.5], + tree_complexity=[0.1], + min_node_weight=[1.0], + logits_dimension=1) + + def testBoostedTreesMakeStatsSummarySecurity(self): + node_ids = [1, 2] + gradients = [[]] + hessians = [[0.2], [0.1]] + bucketized_features_list = [[1], [2]] + max_splits = 3 + num_buckets = 3 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_make_stats_summary( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + bucketized_features_list=bucketized_features_list, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesMakeStatsSummarySecurity2(self): + node_ids = [1, 2, 3] + gradients = [[0.1], [0.2]] + hessians = [[0.2], [0.1]] + bucketized_features_list = [[1], [2]] + max_splits = 3 + num_buckets = 3 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_make_stats_summary( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + bucketized_features_list=bucketized_features_list, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesAggregateStatsSecurity(self): + node_ids = [1, 2] + gradients = [[]] + hessians = [[100.0]] + feature = [[0, 0, 0]] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature=feature, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesAggregateStatsSecurity2(self): + node_ids = [-10] + gradients = [[0.0, 0.0]] + hessians = [[100.0]] + feature = [[0, 0, 0]] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + self.evaluate( + gen_boosted_trees_ops.boosted_trees_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature=feature, + max_splits=max_splits, + num_buckets=num_buckets)) + + def testBoostedTreesSparseAggregateStatsSecurity(self): + node_ids = [] + gradients = [[1.0]] + hessians = [[100.0]] + feature_indices = [[0, 0, 0]] + feature_values = [0, 0, 0] + feature_shape = [0, 0, 0] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature_indices=feature_indices, + feature_values=feature_values, + feature_shape=feature_shape, + max_splits=max_splits, + num_buckets=num_buckets) + class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest): """Tests multi-class/multi-regression for best splits using V2 op."""
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
7- github.com/advisories/GHSA-57wx-m983-2f88ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2021-41208ghsaADVISORY
- github.com/pypa/advisory-database/tree/main/vulns/tensorflow-cpu/PYSEC-2021-617.yamlghsaWEB
- github.com/pypa/advisory-database/tree/main/vulns/tensorflow-gpu/PYSEC-2021-815.yamlghsaWEB
- github.com/pypa/advisory-database/tree/main/vulns/tensorflow/PYSEC-2021-400.yamlghsaWEB
- github.com/tensorflow/tensorflow/commit/5c8c9a8bfe750f9743d0c859bae112060b216f5cghsax_refsource_MISCWEB
- github.com/tensorflow/tensorflow/security/advisories/GHSA-57wx-m983-2f88ghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.