Fix integer overflow bug in triu/tril for large diagonal values (pytorch#153240)

PenXLa · pytorchmergebot · commit 8e05749d5cde · 2025-09-15T18:07:19.000Z
This PR fixes a bug in the implementation of `apply_triu_tril_single` where using extremely large values for the diagonal argument (e.g. `diagonal=9223372036854775807`) could result in integer overflow and incorrect results. The masking logic is re-written to avoid this issue by always iterating over all columns, ensuring correctness even for large or extreme diagonal values. Example of the original incorrect behavior: ```python a = torch.ones(5,5) torch.triu(a, 9223372036854775807) # Before: # tensor([[0., 0., 0., 0., 0.], # [1., 1., 1., 1., 1.], # [1., 1., 1., 1., 1.], # [1., 1., 1., 1., 1.], # [1., 1., 1., 1., 1.]]) ``` The new implementation guards against overflow and produces correct results for all valid input values. Pull Request resolved: pytorch#153240 Approved by: https://github.com/albanD
diff --git a/aten/src/ATen/native/TriangularOps.cpp b/aten/src/ATen/native/TriangularOps.cpp
@@ -52,6 +52,7 @@ void apply_triu_tril_single(
     int64_t self_col_stride,
     bool upper) {
   constexpr int64_t zero = 0;
+  k = std::clamp(k, -n, m); // Clamp k to [-n, m] to prevent i + k arithmetic overflow, especially if k approaches INT64_MAX/INT64_MIN.
 
   if (upper) {
     parallel_for(0, n, 0, [&](int64_t start, int64_t end) {
diff --git a/test/test_linalg.py b/test/test_linalg.py
@@ -9840,6 +9840,39 @@ def test_matmul_mv(self, device, dtype):
         C = torch.matmul(A, B)
         self.assertEqual(C, B.sum().expand(B.shape))
 
+    @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16))
+    def test_triu_tril_extreme_k_values(self, device, dtype):
+        """
+        Test triu/tril with extreme k values to verify overflow fix.
+        Regression test for https://github.com/pytorch/pytorch/pull/153240
+        """
+        # Create test matrices
+        a = make_tensor((5, 5), dtype=dtype, device=device)
+
+        # Test extreme positive k value
+        k_max = 9223372036854775807
+        result_triu_max = torch.triu(a, k_max)
+        result_tril_max = torch.tril(a, k_max)
+
+        # With k = INT64_MAX, triu should return all zeros (since i + k will exceed matrix bounds for all i,j)
+        # and tril should return the full matrix (since i + k + 1 will exceed matrix bounds for all i,j)
+        expected_triu_max = torch.zeros_like(a)
+        expected_tril_max = a.clone()
+        self.assertEqual(result_triu_max, expected_triu_max)
+        self.assertEqual(result_tril_max, expected_tril_max)
+
+        # Test extreme negative k value
+        k_min = -9223372036854775808
+        result_triu_min = torch.triu(a, k_min)
+        result_tril_min = torch.tril(a, k_min)
+
+        # With k = INT64_MIN, triu should return the full matrix (since i + k will be negative for all i,j)
+        # and tril should return all zeros (since i + k + 1 will be negative for all i,j)
+        expected_triu_min = a.clone()
+        expected_tril_min = torch.zeros_like(a)
+        self.assertEqual(result_triu_min, expected_triu_min)
+        self.assertEqual(result_tril_min, expected_tril_min)
+
     @dtypes(torch.float, torch.double)
     @precisionOverride({torch.float32: 1e-4})
     def test_1_sized_with_0_strided(self, device, dtype):