Integer truncation in Shard API usage
Description
In Tensorflow before versions 1.15.4, 2.0.3, 2.1.2, 2.2.1 and 2.3.1, the Shard API in TensorFlow expects the last argument to be a function taking two int64 (i.e., long long) arguments. However, there are several places in TensorFlow where a lambda taking int or int32 arguments is being used. In these cases, if the amount of work to be parallelized is large enough, integer truncation occurs. Depending on how the two arguments of the lambda are used, this can result in segfaults, read/write outside of heap allocated arrays, stack overflows, or data corruption. The issue is patched in commits 27b417360cbd671ef55915e4bb6bb06af8b8a832 and ca8c013b5e97b1373b3bb1c97ea655e69f31a575, and is released in TensorFlow versions 1.15.4, 2.0.3, 2.1.2, 2.2.1, or 2.3.1.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
tensorflowPyPI | < 1.15.4 | 1.15.4 |
tensorflowPyPI | >= 2.0.0, < 2.0.3 | 2.0.3 |
tensorflowPyPI | >= 2.1.0, < 2.1.2 | 2.1.2 |
tensorflowPyPI | >= 2.2.0, < 2.2.1 | 2.2.1 |
tensorflowPyPI | >= 2.3.0, < 2.3.1 | 2.3.1 |
tensorflow-cpuPyPI | < 1.15.4 | 1.15.4 |
tensorflow-cpuPyPI | >= 2.0.0, < 2.0.3 | 2.0.3 |
tensorflow-cpuPyPI | >= 2.1.0, < 2.1.2 | 2.1.2 |
tensorflow-cpuPyPI | >= 2.2.0, < 2.2.1 | 2.2.1 |
tensorflow-cpuPyPI | >= 2.3.0, < 2.3.1 | 2.3.1 |
tensorflow-gpuPyPI | < 1.15.4 | 1.15.4 |
tensorflow-gpuPyPI | >= 2.0.0, < 2.0.3 | 2.0.3 |
tensorflow-gpuPyPI | >= 2.1.0, < 2.1.2 | 2.1.2 |
tensorflow-gpuPyPI | >= 2.2.0, < 2.2.1 | 2.2.1 |
tensorflow-gpuPyPI | >= 2.3.0, < 2.3.1 | 2.3.1 |
Affected products
1- Range: < 1.15.4
Patches
2ca8c013b5e97Prevent integer truncation from 64 to 32 bits.
9 files changed · +17 −15
tensorflow/core/kernels/boosted_trees/prediction_ops.cc+3 −3 modified@@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel { auto do_work = [&resource, &bucketized_features, &cached_tree_ids, &cached_node_ids, &output_partial_logits, &output_node_ids, latest_tree, - this](int32 start, int32 end) { + this](int64 start, int64 end) { for (int32 i = start; i < end; ++i) { int32 tree_id = cached_tree_ids(i); int32 node_id = cached_node_ids(i); @@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel { const int32 last_tree = resource->num_trees() - 1; auto do_work = [&resource, &bucketized_features, &output_logits, last_tree, - this](int32 start, int32 end) { + this](int64 start, int64 end) { for (int32 i = start; i < end; ++i) { std::vector<float> tree_logits(logits_dimension_, 0.0); int32 tree_id = 0; @@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel { // path. Note: feature_ids has one less value than logits_path because the // first value of each logit path will be the bias. auto do_work = [&resource, &bucketized_features, &output_debug_info, - last_tree](int32 start, int32 end) { + last_tree](int64 start, int64 end) { for (int32 i = start; i < end; ++i) { // Proto to store debug outputs, per example. boosted_trees::DebugOutput example_debug_info;
tensorflow/core/kernels/image/crop_and_resize_op.cc+2 −2 modified@@ -223,7 +223,7 @@ struct CropAndResize<CPUDevice, T> { const int depth = crops.dimension(3); // Sharding across boxes. - auto CropAndResizePerBox = [&](int start_box, int limit_box) { + auto CropAndResizePerBox = [&](int64 start_box, int64 limit_box) { for (int b = start_box; b < limit_box; ++b) { const float y1 = boxes(b, 0); const float x1 = boxes(b, 1); @@ -449,7 +449,7 @@ struct CropAndResizeBackpropImage<CPUDevice, T> { grads_image.setZero(); - auto CropAndResizeBackImgPerBox = [&](int start_box, int limit_box) { + auto CropAndResizeBackImgPerBox = [&](int64 start_box, int64 limit_box) { for (int b = start_box; b < limit_box; ++b) { const float y1 = boxes(b, 0); const float x1 = boxes(b, 1);
tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc+2 −1 modified@@ -193,7 +193,8 @@ struct LaunchBatchBandedTriangularSolve { Shard(worker_threads.num_threads, worker_threads.workers, batch_size, cost_per_unit, - [&in_x, &in_y, adjoint, lower, &bcast, out](int start, int limit) { + [&in_x, &in_y, adjoint, lower, &bcast, out](int64 start, + int64 limit) { SequentialBandedTriangularSolveKernel<Scalar>::Run( in_x, in_y, lower, adjoint, bcast, out, start, limit); });
tensorflow/core/kernels/nth_element_op.cc+2 −1 modified@@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> { const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1); // Allocate each row to different shard. - auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) { + auto SubNthElement = [&, input, output, last_dim, n](int64 start, + int64 limit) { // std::nth_element would rearrange the array, so we need a new buffer. std::vector<T> buf(last_dim);
tensorflow/core/kernels/parameterized_truncated_normal_op.cc+4 −4 modified@@ -70,8 +70,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> { auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs, &minvals, &maxvals, &gen, &output, - kStdDevsInsideBoundsToUseRandnSampler](int start_batch, - int limit_batch) { + kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch, + int64 limit_batch) { // Capturing "gen" by-value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // "gen" by reference and explicitly do a copy assignment here. @@ -333,8 +333,8 @@ struct TruncatedNormalFunctorV2<CPUDevice, T> { auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means, &stddevs, &minvals, &maxvals, &gen, &output, - kStdDevsInsideBoundsToUseRandnSampler](int start_output, - int limit_output) { + kStdDevsInsideBoundsToUseRandnSampler](int64 start_output, + int64 limit_output) { // Capturing "gen" by-value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // "gen" by reference and explicitly do a copy assignment here.
tensorflow/core/kernels/random_binomial_op.cc+1 −1 modified@@ -184,7 +184,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> { // the sample shape and [H1, ... Hm] for the batch shape of the samples. // We have B1 * ... * Bk samples per batch member we need. auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs, - &gen, &output](int start_output, int limit_output) { + &gen, &output](int64 start_output, int64 limit_output) { // Vectorized intermediate calculations for uniform rejection sampling. // We always generate at most 4 samples. Eigen::array<T, 4> z;
tensorflow/core/kernels/random_poisson_op.cc+1 −1 modified@@ -97,7 +97,7 @@ struct PoissonFunctor<CPUDevice, T, U> { typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform; auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat]( - int start_output, int limit_output) { + int64 start_output, int64 limit_output) { // Capturing "rng" by value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // "rng" by reference and explicitly do a copy assignment.
tensorflow/core/kernels/stateless_random_ops.cc+1 −1 modified@@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase { // avoid a couple flops which can be done on a per-alpha basis. auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat, - alpha_flat](int start_output, int limit_output) { + alpha_flat](int64 start_output, int64 limit_output) { // Capturing "random" by-value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // "random" by reference and explicitly do a copy assignment.
tensorflow/core/kernels/topk_op.cc+1 −1 modified@@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> { return Status::OK(); } - auto SortIndices = [&](int start_batch, int limit_batch) { + auto SortIndices = [&](int64 start_batch, int64 limit_batch) { for (int32 b = start_batch; b < limit_batch; ++b) { const T* input_data = &input(b, 0); const auto stable_comp = [input_data](const int32 a, const int32 b) {
27b417360cbdPrevent `int64` to `int` truncation in `Shard` API usage.
1 file changed · +1 −1
tensorflow/core/kernels/random_op.cc+1 −1 modified@@ -202,7 +202,7 @@ class RandomGammaOp : public OpKernel { // avoid a couple flops which can be done on a per-alpha basis. auto DoWork = [samples_per_alpha, num_alphas, &rng, samples_flat, - alpha_flat](int start_output, int limit_output) { + alpha_flat](int64 start_output, int64 limit_output) { using Eigen::numext::exp; using Eigen::numext::log; using Eigen::numext::log1p;
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
10- lists.opensuse.org/opensuse-security-announce/2020-10/msg00065.htmlghsavendor-advisoryx_refsource_SUSEWEB
- github.com/advisories/GHSA-h6fg-mjxg-hqq4ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2020-15202ghsaADVISORY
- github.com/pypa/advisory-database/tree/main/vulns/tensorflow-cpu/PYSEC-2020-282.yamlghsaWEB
- github.com/pypa/advisory-database/tree/main/vulns/tensorflow-gpu/PYSEC-2020-317.yamlghsaWEB
- github.com/pypa/advisory-database/tree/main/vulns/tensorflow/PYSEC-2020-125.yamlghsaWEB
- github.com/tensorflow/tensorflow/commit/27b417360cbd671ef55915e4bb6bb06af8b8a832ghsax_refsource_MISCWEB
- github.com/tensorflow/tensorflow/commit/ca8c013b5e97b1373b3bb1c97ea655e69f31a575ghsax_refsource_MISCWEB
- github.com/tensorflow/tensorflow/releases/tag/v2.3.1ghsax_refsource_MISCWEB
- github.com/tensorflow/tensorflow/security/advisories/GHSA-h6fg-mjxg-hqq4ghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.