VYPR
High severityNVD Advisory· Published Nov 5, 2021· Updated Aug 4, 2024

Use after free in `CollectiveReduceV2`

CVE-2021-41220

Description

TensorFlow is an open source platform for machine learning. In affected versions the async implementation of CollectiveReduceV2 suffers from a memory leak and a use after free. This occurs due to the asynchronous computation and the fact that objects that have been std::move()d from are still accessed. The fix will be included in TensorFlow 2.7.0. We will also cherrypick this commit on TensorFlow 2.6.1, as this version is the only one that is also affected.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
tensorflowPyPI
>= 2.6.0, < 2.6.12.6.1
tensorflow-cpuPyPI
>= 2.6.0, < 2.6.12.6.1
tensorflow-gpuPyPI
>= 2.6.0, < 2.6.12.6.1

Affected products

1

Patches

1
ca38dab9d3ee

Fix undefined behavior in CollectiveReduceV2 and others

https://github.com/tensorflow/tensorflowRan ChenOct 4, 2021via ghsa
2 files changed · +77 11
  • tensorflow/core/kernels/collective_ops.cc+14 11 modified
    @@ -494,15 +494,17 @@ class CollectiveOpV2Kernel : public AsyncOpKernel {
                                   const Tensor& group_size, const Tensor& group_key,
                                   const Tensor& instance_key) {
         if (group_size.dims() > 0) {
    -      return errors::Internal("Unexpected dimensions on input group_size, got ",
    -                              group_size.shape().DebugString());
    +      return errors::InvalidArgument(
    +          "Unexpected dimensions on input group_size, got ",
    +          group_size.shape().DebugString());
         }
         if (group_key.dims() > 0) {
    -      return errors::Internal("Unexpected dimensions on input group_key, got ",
    -                              group_key.shape().DebugString());
    +      return errors::InvalidArgument(
    +          "Unexpected dimensions on input group_key, got ",
    +          group_key.shape().DebugString());
         }
         if (instance_key.dims() > 0) {
    -      return errors::Internal(
    +      return errors::InvalidArgument(
               "Unexpected dimensions on input instance_key, got ",
               instance_key.shape().DebugString());
         }
    @@ -625,7 +627,7 @@ class CollectiveReduceV2OpKernel : public CollectiveOpV2Kernel {
                                                   /*group_size*/ c->input(1),
                                                   /*group_key*/ c->input(2),
                                                   /*instance_key*/ c->input(3)),
    -                         done);
    +                         done_with_cleanup);
         col_params->instance.shape = c->input(0).shape();
         col_params->merge_op = merge_op_.get();
         col_params->final_op = final_op_.get();
    @@ -855,14 +857,15 @@ class CollectiveInitializeCommunicatorOpKernel : public AsyncOpKernel {
     
       Status CheckInputs(Tensor group_size_t, Tensor group_key_t) {
         if (group_size_t.dims() > 0) {
    -      return errors::Internal(
    +      return errors::InvalidArgument(
               "Unexpected dimensions on input group_size. "
               "It shoulbe a scalar, got tensor with shape ",
               group_size_t.shape().DebugString());
         }
         if (group_key_t.dims() > 0) {
    -      return errors::Internal("Unexpected dimensions on input group_key, got ",
    -                              group_key_t.shape().DebugString());
    +      return errors::InvalidArgument(
    +          "Unexpected dimensions on input group_key, got ",
    +          group_key_t.shape().DebugString());
         }
     
         auto group_size = group_size_t.unaligned_flat<int32>()(0);
    @@ -1084,7 +1087,7 @@ class CollectiveReduceV3OpKernel : public CollectiveOpV3Kernel {
         };
         core::RefCountPtr<CollectiveGroupResource> resource;
         OP_REQUIRES_OK_ASYNC(c, LookupResource(c, HandleFromInput(c, 1), &resource),
    -                         done);
    +                         done_with_cleanup);
     
         Tensor group_assignment = c->input(2);
     
    @@ -1134,7 +1137,7 @@ class CollectiveAllToAllV3OpKernel : public CollectiveOpV3Kernel {
         };
         core::RefCountPtr<CollectiveGroupResource> resource;
         OP_REQUIRES_OK_ASYNC(c, LookupResource(c, HandleFromInput(c, 1), &resource),
    -                         done);
    +                         done_with_cleanup);
     
         Tensor group_assignment = c->input(2);
     
    
  • tensorflow/python/kernel_tests/collective_ops_test.py+63 0 modified
    @@ -1182,6 +1182,69 @@ def f():
         self.assertAllEqual(self.evaluate(f()), [[3.], [3.]])
     
     
    +@combinations.generate(
    +    combinations.times(
    +        combinations.combine(collective_op=[
    +            combinations.NamedObject('all_reduce_v2',
    +                                     CollectiveOpsV2.all_reduce),
    +            combinations.NamedObject('all_gather_v2',
    +                                     CollectiveOpsV2.all_gather)
    +        ]), device_combination))
    +class InvalidInputTest(test.TestCase, parameterized.TestCase):
    +
    +  def setUp(self):
    +    _setup_context()
    +    super().setUp()
    +
    +  def testInvalidGroupKey(self, collective_op, device, communication):
    +    dev0 = '/device:%s:0' % device
    +    group_size = 2
    +    group_key = [100]
    +    instance_key = 100
    +    in_tensor = constant_op.constant([1.])
    +
    +    with self.assertRaises(errors.InvalidArgumentError):
    +      with ops.device(dev0):
    +        collective_op(
    +            in_tensor,
    +            group_size,
    +            group_key,
    +            instance_key,
    +            communication_hint=communication)
    +
    +  def testInvalidGroupSize(self, collective_op, device, communication):
    +    dev0 = '/device:%s:0' % device
    +    group_size = -2
    +    group_key = 100
    +    instance_key = 100
    +    in_tensor = constant_op.constant([1.])
    +
    +    with self.assertRaises(errors.InvalidArgumentError):
    +      with ops.device(dev0):
    +        collective_op(
    +            in_tensor,
    +            group_size,
    +            group_key,
    +            instance_key,
    +            communication_hint=communication)
    +
    +  def testInvalidInstanceKey(self, collective_op, device, communication):
    +    dev0 = '/device:%s:0' % device
    +    group_size = 2
    +    group_key = 100
    +    instance_key = [100]
    +    in_tensor = constant_op.constant([1.])
    +
    +    with self.assertRaises(errors.InvalidArgumentError):
    +      with ops.device(dev0):
    +        collective_op(
    +            in_tensor,
    +            group_size,
    +            group_key,
    +            instance_key,
    +            communication_hint=communication)
    +
    +
     class CollectiveOpsV3Test(test.TestCase, parameterized.TestCase):
     
       def setUp(self):
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

7

News mentions

0

No linked articles in our index yet.