[aotinductor] add option to disable runtime assertions (pytorch#146462)

ColinPeppler · pytorchmergebot · commit 9c506aa8a627 · 2025-03-02T09:14:58.000Z
A recent user experience is like this: * User runs AOTI lowering, it's successful. * They take AOTI model and run it with some sample inputs. Everything runs well * Then they boot up a serving test that loads the AOTI model and runs it with a set of sample requests. * They see that some of the requests fail. The logs show them this: * AOTInductorModel run failed with input spec: [1, 32]:c10::BFloat16, [2]:long ... * Error: u45 >= 2 * To the untrained eye, "AOTInductorModel run failed" is all they see. But, the true reason is Error: u45 >= 2 However, the assertion isn't always correct. * In fact, u45 can actually be 0. * So, why did AOTI say u45 ≥ 2? It's a two-piece combo: * With 0/1 Specialization, the ShapeEnv creates symbolic shapes (e.g. s0) with a default value-range of [2, inf] * In the graph, Dynamo traces torch.mul(A, B) where A is [s0, ...]and B is [u45, ...]. So, Dynamo learns Eq(s0, u45). * Therefore, u45 also has a range of [2, inf]. Hence, the incorrect runtime assertion. So, the motivation for this PR is to add an option to disable the logging. If you run into a situation like this. However, another way to avoid this is to call `mark_unbacked()` on all the dynamic dims. @diff-train-skip-merge Pull Request resolved: pytorch#146462 Approved by: https://github.com/desertfire, https://github.com/22quinn
diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py
@@ -4141,6 +4141,12 @@ def forward(self, a, b, c):
             unexpected_inputs = (torch.ones(0, device=self.device), b, c)
             compiled(*unexpected_inputs)
 
+        # Try it again without runtime assertions.
+        with config.patch({"scalar_asserts": False}):
+            AOTIRunnerUtil.run_multiple(
+                self.device, model, [example_inputs, unexpected_inputs]
+            )
+
     def test_none_args_aot_codegen(self):
         if self.device != GPU_TYPE:
             raise unittest.SkipTest("requires GPU")
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
@@ -148,6 +148,7 @@ def prologue_fusion_enabled() -> bool:
 # put correctness assertions in generated code
 size_asserts = os.environ.get("TORCHINDUCTOR_SIZE_ASSERTS", "1") == "1"
 nan_asserts = os.environ.get("TORCHINDUCTOR_NAN_ASSERTS") == "1"
+scalar_asserts = os.environ.get("TORCHINDUCTOR_SCALAR_ASSERTS", "1") == "1"
 
 # enable loop reordering based on input orders
 pick_loop_orders = True
diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
@@ -6333,14 +6333,16 @@ def get_unbacked_symbol_uses(self):  # type: ignore[no-untyped-def]
         return free_unbacked_symbols(self.scalar)
 
     def codegen(self, wrapper) -> None:  # type: ignore[no-untyped-def]
+        if not config.scalar_asserts:
+            return
         # NB: It is EXTREMELY important not to simplify the scalar under assertion here,
         # because simplify is done with respect to runtime asserts.  So if you have
         # "u0 == 0" in the runtime asserts, if you subsequently try to
         # simplify(u0 == 0), you will get True (because we've already runtime assert'ed
         # that it's true).  But we're code generating the actual runtime assert here!!
         symbol = next(iter(self.get_unbacked_symbol_uses()))
-        symbol_str = f"std::to_string({symbol})"
         if V.graph.cpp_wrapper:
+            symbol_str = f"std::to_string({symbol})"
             sizevar = V.graph.wrapper_code.codegen_cpp_sizevar(
                 self.scalar, simplify=False
             )