VYPR
High severityNVD Advisory· Published Sep 25, 2020· Updated Aug 4, 2024

Integer truncation in Shard API usage

CVE-2020-15202

Description

In Tensorflow before versions 1.15.4, 2.0.3, 2.1.2, 2.2.1 and 2.3.1, the Shard API in TensorFlow expects the last argument to be a function taking two int64 (i.e., long long) arguments. However, there are several places in TensorFlow where a lambda taking int or int32 arguments is being used. In these cases, if the amount of work to be parallelized is large enough, integer truncation occurs. Depending on how the two arguments of the lambda are used, this can result in segfaults, read/write outside of heap allocated arrays, stack overflows, or data corruption. The issue is patched in commits 27b417360cbd671ef55915e4bb6bb06af8b8a832 and ca8c013b5e97b1373b3bb1c97ea655e69f31a575, and is released in TensorFlow versions 1.15.4, 2.0.3, 2.1.2, 2.2.1, or 2.3.1.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
tensorflowPyPI
< 1.15.41.15.4
tensorflowPyPI
>= 2.0.0, < 2.0.32.0.3
tensorflowPyPI
>= 2.1.0, < 2.1.22.1.2
tensorflowPyPI
>= 2.2.0, < 2.2.12.2.1
tensorflowPyPI
>= 2.3.0, < 2.3.12.3.1
tensorflow-cpuPyPI
< 1.15.41.15.4
tensorflow-cpuPyPI
>= 2.0.0, < 2.0.32.0.3
tensorflow-cpuPyPI
>= 2.1.0, < 2.1.22.1.2
tensorflow-cpuPyPI
>= 2.2.0, < 2.2.12.2.1
tensorflow-cpuPyPI
>= 2.3.0, < 2.3.12.3.1
tensorflow-gpuPyPI
< 1.15.41.15.4
tensorflow-gpuPyPI
>= 2.0.0, < 2.0.32.0.3
tensorflow-gpuPyPI
>= 2.1.0, < 2.1.22.1.2
tensorflow-gpuPyPI
>= 2.2.0, < 2.2.12.2.1
tensorflow-gpuPyPI
>= 2.3.0, < 2.3.12.3.1

Affected products

1

Patches

2
ca8c013b5e97

Prevent integer truncation from 64 to 32 bits.

https://github.com/tensorflow/tensorflowMihai MaruseacSep 19, 2020via ghsa
9 files changed · +17 15
  • tensorflow/core/kernels/boosted_trees/prediction_ops.cc+3 3 modified
    @@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel {
           auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
                           &cached_node_ids, &output_partial_logits,
                           &output_node_ids, latest_tree,
    -                      this](int32 start, int32 end) {
    +                      this](int64 start, int64 end) {
             for (int32 i = start; i < end; ++i) {
               int32 tree_id = cached_tree_ids(i);
               int32 node_id = cached_node_ids(i);
    @@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel {
     
         const int32 last_tree = resource->num_trees() - 1;
         auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
    -                    this](int32 start, int32 end) {
    +                    this](int64 start, int64 end) {
           for (int32 i = start; i < end; ++i) {
             std::vector<float> tree_logits(logits_dimension_, 0.0);
             int32 tree_id = 0;
    @@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
         // path. Note: feature_ids has one less value than logits_path because the
         // first value of each logit path will be the bias.
         auto do_work = [&resource, &bucketized_features, &output_debug_info,
    -                    last_tree](int32 start, int32 end) {
    +                    last_tree](int64 start, int64 end) {
           for (int32 i = start; i < end; ++i) {
             // Proto to store debug outputs, per example.
             boosted_trees::DebugOutput example_debug_info;
    
  • tensorflow/core/kernels/image/crop_and_resize_op.cc+2 2 modified
    @@ -223,7 +223,7 @@ struct CropAndResize<CPUDevice, T> {
         const int depth = crops.dimension(3);
     
         // Sharding across boxes.
    -    auto CropAndResizePerBox = [&](int start_box, int limit_box) {
    +    auto CropAndResizePerBox = [&](int64 start_box, int64 limit_box) {
           for (int b = start_box; b < limit_box; ++b) {
             const float y1 = boxes(b, 0);
             const float x1 = boxes(b, 1);
    @@ -449,7 +449,7 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
     
         grads_image.setZero();
     
    -    auto CropAndResizeBackImgPerBox = [&](int start_box, int limit_box) {
    +    auto CropAndResizeBackImgPerBox = [&](int64 start_box, int64 limit_box) {
           for (int b = start_box; b < limit_box; ++b) {
             const float y1 = boxes(b, 0);
             const float x1 = boxes(b, 1);
    
  • tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc+2 1 modified
    @@ -193,7 +193,8 @@ struct LaunchBatchBandedTriangularSolve {
     
         Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
               cost_per_unit,
    -          [&in_x, &in_y, adjoint, lower, &bcast, out](int start, int limit) {
    +          [&in_x, &in_y, adjoint, lower, &bcast, out](int64 start,
    +                                                      int64 limit) {
                 SequentialBandedTriangularSolveKernel<Scalar>::Run(
                     in_x, in_y, lower, adjoint, bcast, out, start, limit);
               });
    
  • tensorflow/core/kernels/nth_element_op.cc+2 1 modified
    @@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> {
         const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);
     
         // Allocate each row to different shard.
    -    auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) {
    +    auto SubNthElement = [&, input, output, last_dim, n](int64 start,
    +                                                         int64 limit) {
           // std::nth_element would rearrange the array, so we need a new buffer.
           std::vector<T> buf(last_dim);
     
    
  • tensorflow/core/kernels/parameterized_truncated_normal_op.cc+4 4 modified
    @@ -70,8 +70,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> {
     
         auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
                         &minvals, &maxvals, &gen, &output,
    -                    kStdDevsInsideBoundsToUseRandnSampler](int start_batch,
    -                                                           int limit_batch) {
    +                    kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch,
    +                                                           int64 limit_batch) {
           // Capturing "gen" by-value would only make a copy for the _shared_
           // lambda.  Since we want to let each worker have its own copy, we pass
           // "gen" by reference and explicitly do a copy assignment here.
    @@ -333,8 +333,8 @@ struct TruncatedNormalFunctorV2<CPUDevice, T> {
     
         auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means,
                         &stddevs, &minvals, &maxvals, &gen, &output,
    -                    kStdDevsInsideBoundsToUseRandnSampler](int start_output,
    -                                                           int limit_output) {
    +                    kStdDevsInsideBoundsToUseRandnSampler](int64 start_output,
    +                                                           int64 limit_output) {
           // Capturing "gen" by-value would only make a copy for the _shared_
           // lambda.  Since we want to let each worker have its own copy, we pass
           // "gen" by reference and explicitly do a copy assignment here.
    
  • tensorflow/core/kernels/random_binomial_op.cc+1 1 modified
    @@ -184,7 +184,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> {
         // the sample shape and [H1, ... Hm] for the batch shape of the samples.
         // We have B1 * ... * Bk samples per batch member we need.
         auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs,
    -                   &gen, &output](int start_output, int limit_output) {
    +                   &gen, &output](int64 start_output, int64 limit_output) {
           // Vectorized intermediate calculations for uniform rejection sampling.
           // We always generate at most 4 samples.
           Eigen::array<T, 4> z;
    
  • tensorflow/core/kernels/random_poisson_op.cc+1 1 modified
    @@ -97,7 +97,7 @@ struct PoissonFunctor<CPUDevice, T, U> {
         typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
     
         auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
    -                      int start_output, int limit_output) {
    +                      int64 start_output, int64 limit_output) {
           // Capturing "rng" by value would only make a copy for the _shared_
           // lambda.  Since we want to let each worker have its own copy, we pass
           // "rng" by reference and explicitly do a copy assignment.
    
  • tensorflow/core/kernels/stateless_random_ops.cc+1 1 modified
    @@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase {
         // avoid a couple flops which can be done on a per-alpha basis.
     
         auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat,
    -                   alpha_flat](int start_output, int limit_output) {
    +                   alpha_flat](int64 start_output, int64 limit_output) {
           // Capturing "random" by-value would only make a copy for the _shared_
           // lambda.  Since we want to let each worker have its own copy, we pass
           // "random" by reference and explicitly do a copy assignment.
    
  • tensorflow/core/kernels/topk_op.cc+1 1 modified
    @@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> {
           return Status::OK();
         }
     
    -    auto SortIndices = [&](int start_batch, int limit_batch) {
    +    auto SortIndices = [&](int64 start_batch, int64 limit_batch) {
           for (int32 b = start_batch; b < limit_batch; ++b) {
             const T* input_data = &input(b, 0);
             const auto stable_comp = [input_data](const int32 a, const int32 b) {
    
27b417360cbd

Prevent `int64` to `int` truncation in `Shard` API usage.

https://github.com/tensorflow/tensorflowMihai MaruseacSep 19, 2020via ghsa
1 file changed · +1 1
  • tensorflow/core/kernels/random_op.cc+1 1 modified
    @@ -202,7 +202,7 @@ class RandomGammaOp : public OpKernel {
         // avoid a couple flops which can be done on a per-alpha basis.
     
         auto DoWork = [samples_per_alpha, num_alphas, &rng, samples_flat,
    -                   alpha_flat](int start_output, int limit_output) {
    +                   alpha_flat](int64 start_output, int64 limit_output) {
           using Eigen::numext::exp;
           using Eigen::numext::log;
           using Eigen::numext::log1p;
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

10

News mentions

0

No linked articles in our index yet.