[Inductor] Avoid tensor slice overflow for large step (pytorch#147433)

DDEle · pytorchmergebot · commit 1d7397a2d04a · 2025-03-02T16:07:15.000Z
Fixes pytorch#147071 Currently, if step is a value very close to INT64_MAX, the calculation of slice output length will overflow. This PR tries to fix this problem and thus fix pytorch#147071. Pull Request resolved: pytorch#147433 Approved by: https://github.com/leslie-fang-intel, https://github.com/jansel
diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp
@@ -3059,7 +3059,8 @@ Tensor slice(
   }
   auto storage_offset = self.storage_offset() + start_val * strides[dim];
   auto len = end_val - start_val;
-  sizes[dim] = (len + step - 1) / step; // round-up
+  sizes[dim] =
+      (len == 0) ? 0 : (1 + (len - 1) / step); // round-up, avoiding overflow
   strides[dim] *= step;
 
   Tensor result;
diff --git a/test/expect/HasDecompTest.test_has_decomposition.expect b/test/expect/HasDecompTest.test_has_decomposition.expect
@@ -1167,8 +1167,6 @@ aten::set_
 aten::set_.source_Storage
 aten::set_.source_Storage_storage_offset
 aten::set_.source_Tensor
-aten::slice_copy.Tensor
-aten::slice_copy.Tensor_out
 aten::slice_inverse
 aten::slow_conv3d_forward
 aten::slow_conv3d_forward.output
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
@@ -12527,6 +12527,17 @@ def f(x):
             ms = do_bench(lambda: opt_f(x))
             print(f"{ms=:.3f}")
 
+    def test_slice_overflow(self):
+        # https://github.com/pytorch/pytorch/issues/147071
+        def f(input):
+            var = torch.slice_copy(
+                input, dim=0, start=449, end=None, step=9223372036854775807
+            )
+            return torch.reciprocal(var)
+
+        input = torch.randn((875,))
+        self.assertEqual(torch.compile(f)(input), f(input))
+
     @torch._inductor.config.patch("graph_partition", True)
     def test_graph_partition_no_inputs(self):
         def foo():
diff --git a/torch/_decomp/__init__.py b/torch/_decomp/__init__.py
@@ -477,6 +477,7 @@ def _core_aten_decompositions_post_autograd() -> dict[
             aten.sinc,
             aten.sinc_,
             aten.slice_backward,
+            aten.slice_copy,
             aten.smooth_l1_loss,
             aten.smooth_l1_loss_backward,
             aten.soft_margin_loss,
diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py
@@ -748,7 +748,7 @@ def slice_forward(
 
     storage_offset = self.storage_offset() + start_val * strides[dim]
     len = end_val - start_val
-    sizes[dim] = (len + step - 1) // step
+    sizes[dim] = -(len // -step)  # round-up, avoiding overflow
     strides[dim] *= step
 
     if self.is_quantized:
@@ -759,6 +759,23 @@ def slice_forward(
         return self.as_strided(sizes, strides, storage_offset)
 
 
+@register_decomposition([aten.slice_copy.Tensor, aten.slice_copy.Tensor_out])
+def slice_copy(
+    self: Tensor,
+    dim: int = 0,
+    start: Optional[int] = None,
+    end: Optional[int] = None,
+    step: int = 1,
+    out: Optional[Tensor] = None,
+):
+    _slice = slice_forward(self, dim, start, end, step)
+    slice_clone = _slice.clone(memory_format=torch.contiguous_format)
+    if out is None:
+        return slice_clone
+    else:
+        return _safe_copy_out(copy_from=slice_clone, copy_to=out, exact_dtype=True)
+
+
 def _normalize_start_end(
     x: Tensor, dim: int, start: Optional[int], end: Optional[int]
 ) -> tuple[int, int]:

Original file line number	Diff line number	Diff line change
`@@ -3059,7 +3059,8 @@ Tensor slice(`
`3059`	`3059`	`}`
`3060`	`3060`	`auto storage_offset = self.storage_offset() + start_val * strides[dim];`
`3061`	`3061`	`auto len = end_val - start_val;`
`3062`		`- sizes[dim] = (len + step - 1) / step; // round-up`
	`3062`	`+ sizes[dim] =`
	`3063`	`+ (len == 0) ? 0 : (1 + (len - 1) / step); // round-up, avoiding overflow`
`3063`	`3064`	`strides[dim] *= step;`
`3064`	`3065`
`3065`	`3066`	`Tensor result;`