Remove monkeypatch of has_frozen_params in test/inductor/test_codecache.py (pytorch#141898)

masnesral · pytorchmergebot · commit 78e53a92c377 · 2024-12-03T20:38:10.000Z
Summary: This particular test isn't really needed since the code path is already exercised in `test_freezing`. While I was here, I beefed up testing in that method to consider whether the frozen paramater is inlinable vs. not since the caching behavior is different. Pull Request resolved: pytorch#141898 Approved by: https://github.com/ezyang, https://github.com/jansel
diff --git a/test/inductor/test_codecache.py b/test/inductor/test_codecache.py
@@ -425,24 +425,6 @@ def fn2(x):
         self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 2)
         self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0)
 
-        # Now pretend the constants are frozen params.
-        counters.clear()
-        self.reset()
-
-        with mock.patch(
-            "torch._inductor.output_code.has_frozen_params", return_value=True
-        ):
-            # A call to fn1 should miss in the cache since we do not consider
-            # the constant values.
-            self.assertEqual(fn1(a), compiled_fn1(a))
-            self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 1)
-            self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0)
-
-            # A call to fn2 should hit for the same reason.
-            self.assertEqual(fn2(a), compiled_fn2(a))
-            self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 1)
-            self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 1)
-
     @requires_cuda
     @config.patch({"fx_graph_cache": True})
     @config.patch({"fx_graph_remote_cache": False})
@@ -806,14 +788,28 @@ def f(x, val):
     @config.patch({"fx_graph_remote_cache": False})
     @config.patch({"freezing": True})
     @parametrize("device", (GPU_TYPE, "cpu"))
-    def test_freezing(self, device):
+    @parametrize("inlinable", (True, False))
+    def test_freezing(self, device, inlinable):
         if device == GPU_TYPE and not HAS_GPU:
             raise unittest.SkipTest(f"requires {GPU_TYPE}")
 
+        # For machines with mkldnn_fp16 support, weight_pack in mkldnn_fusion.py causes
+        # the creation of a mkldnn format tensor which the current implementation does
+        # not support.
+        if (
+            device == "cpu"
+            and torch.backends.mkldnn.is_available()
+            and torch.ops.mkldnn._is_mkldnn_fp16_supported()
+        ):
+            raise unittest.SkipTest("mkldnn tensors unsupported")
+
+        # The shape of the frozen constant determines if it will be inlined.
+        shape = (4,) if inlinable else (8, 8)
+
         class MM(torch.nn.Module):
             def __init__(self) -> None:
                 super().__init__()
-                self.param = torch.nn.Parameter(torch.rand(8, 8))
+                self.param = torch.nn.Parameter(torch.rand(shape))
 
             def forward(self, x):
                 return x @ self.param
@@ -823,71 +819,37 @@ def forward(self, x):
         # Populate a cache entry.
         mod1 = MM().to(device=device, dtype=dtype)
         with torch.no_grad():
-            x = torch.rand(8, 8).to(device=device, dtype=dtype)
+            x = torch.rand(shape).to(device=device, dtype=dtype)
             out0 = mod1(x)
             out1 = torch.compile(mod1)(x)
             self.assertEqual(out0, out1)
 
-        # For mahcine that has mkldnn_fp16 support, the weight_pack in mkldnn_fusion.py
-        # wroks, which result in mkldnn format tensor, then the exception
-        # BypassFxGraphCache("mkldnn tensors unpickleable") is raised, and cause the
-        # fxgraph not cached.
-        def is_cpu_mkldnn_fp16_supported():
-            return (
-                device == "cpu"
-                and torch.backends.mkldnn.is_available()
-                and torch.ops.mkldnn._is_mkldnn_fp16_supported()
-            )
-
-        if is_cpu_mkldnn_fp16_supported():
-            fxgraph_cache_bypass_cnt = 1
-            fxgraph_cache_miss_cnt = 0
-            fxgraph_cache_hit_cnt = 0
-        else:
-            fxgraph_cache_bypass_cnt = 0
-            fxgraph_cache_miss_cnt = 1
-            fxgraph_cache_hit_cnt = 0
-
-        self.assertEqual(
-            counters["inductor"]["fxgraph_cache_bypass"], fxgraph_cache_bypass_cnt
-        )
-        self.assertEqual(
-            counters["inductor"]["fxgraph_cache_miss"], fxgraph_cache_miss_cnt
-        )
-        self.assertEqual(
-            counters["inductor"]["fxgraph_cache_hit"], fxgraph_cache_hit_cnt
-        )
+        self.assertEqual(counters["inductor"]["fxgraph_cache_bypass"], 0)
+        self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 1)
+        self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0)
 
         counters.clear()
         self.reset()
 
-        # Same nn.Module, but with different parameters should cache hit.
+        # Same nn.Module, but with different parameters. In the case that the param can
+        # be inlined, we should consider the actual tensor value and we expect a cache
+        # miss (because the values are different here). If the param cannot be inlined,
+        # then we consider only the tensor metadata and we expect a cache hit.
         mod2 = MM().to(device=device, dtype=dtype)
         self.assertNotEqual(mod1.param, mod2.param)
 
         with torch.no_grad():
-            x = torch.rand(8, 8).to(device=device, dtype=dtype)
+            x = torch.rand(shape).to(device=device, dtype=dtype)
             out0 = mod2(x)
             out1 = torch.compile(mod2)(x)
             self.assertEqual(out0, out1)
 
-        if is_cpu_mkldnn_fp16_supported():
-            fxgraph_cache_bypass_cnt = 1
-            fxgraph_cache_miss_cnt = 0
-            fxgraph_cache_hit_cnt = 0
-        else:
-            fxgraph_cache_bypass_cnt = 0
-            fxgraph_cache_miss_cnt = 0
-            fxgraph_cache_hit_cnt = 1
-
-        self.assertEqual(
-            counters["inductor"]["fxgraph_cache_bypass"], fxgraph_cache_bypass_cnt
-        )
+        self.assertEqual(counters["inductor"]["fxgraph_cache_bypass"], 0)
         self.assertEqual(
-            counters["inductor"]["fxgraph_cache_miss"], fxgraph_cache_miss_cnt
+            counters["inductor"]["fxgraph_cache_miss"], 1 if inlinable else 0
         )
         self.assertEqual(
-            counters["inductor"]["fxgraph_cache_hit"], fxgraph_cache_hit_cnt
+            counters["inductor"]["fxgraph_cache_hit"], 0 if inlinable else 1
         )
 
 
diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py
@@ -49,12 +49,6 @@
 )
 
 import torch
-
-# WARNING: Do not directly import has_frozen_params, it is monkeypatched in
-# python test/inductor/test_codecache.py
-# TestFxGraphCache.test_constant_handling_device_cpu
-# TODO: Why are we monkeypatching it......
-import torch._inductor.output_code as output_code
 import torch.distributed as dist
 from torch import SymInt, Tensor
 from torch._dynamo.utils import (
@@ -70,6 +64,7 @@
     rocm_compiler,
 )
 from torch._inductor.custom_graph_pass import CustomGraphPass, CustomGraphPassType
+from torch._inductor.output_code import has_frozen_params
 from torch._utils_internal import log_cache_bypass
 
 from .remote_cache import create_cache
@@ -897,7 +892,7 @@ def compiled_fx_graph_hash(
     # To support caching when the graph has frozen params, we ignore the tensor values
     # of non-inlined constants since they won't be included in the cache entry. Without
     # freezing, we want to include the values of any constant attribute.
-    include_non_inlined = not output_code.has_frozen_params(gm)
+    include_non_inlined = not has_frozen_params(gm)
 
     details = FxGraphHashDetails(gm, example_inputs, fx_kwargs, inputs_to_check)
     has_user_defined_triton_kernels = len(details.user_defined_triton_source) != 0
@@ -1400,9 +1395,7 @@ def _check_can_cache(gm: torch.fx.GraphModule) -> None:
                 raise BypassFxGraphCache("Unsupported post grad custom pass")
 
         # Freezing can embed constants that wouldn't be static across runs.
-        if output_code.has_frozen_params(
-            gm
-        ) and not torch._utils_internal.justknobs_check(
+        if has_frozen_params(gm) and not torch._utils_internal.justknobs_check(
             "pytorch/inductor:allow_freezing_with_caching"
         ):
             raise BypassFxGraphCache("Skipping graph with frozen constants")