Merge remote-tracking branch 'origin/fix_ci' into add_h100_ci

jainapurva · jainapurva · commit 46a7b0cd1d5f · 2025-01-17T15:03:28.000-08:00
diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py
@@ -1821,7 +1821,7 @@ def test_autoquant_int4wo(self, device, dtype):
             self.assertGreater(compute_error(ref, out), 20)
 
     @parameterized.expand(COMMON_DEVICE_DTYPE)
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    @unittest.skipIf(not is_sm_at_least_90(), "Need cuda arch greater than SM90")
     @unittest.skipIf(
         not TORCH_VERSION_AT_LEAST_2_5, "autoquant int4 option requires 2.5+."
     )
diff --git a/torchao/quantization/qat/utils.py b/torchao/quantization/qat/utils.py
@@ -16,14 +16,6 @@
     _get_per_token_block_size,
 )
 
-# Attribute name representing the forward prehook wrapping the
-# linear input in an `AffineFakeQuantizedTensor` on a linear module.
-#
-# The value of this attribute is a 2-tuple of (prehook, handle).
-# The prehook can be disabled by calling `handle.remove()`, and
-# re-enabled by calling `module.register_forward_pre_hook(prehook)`.
-_QAT_LINEAR_SUBCLASS_INPUT_PREHOOK = "_qat_linear_subclass_input_prehook"
-
 
 class _GenericFakeQuantize(torch.autograd.Function):
     """

Original file line number	Diff line number	Diff line change
`@@ -1821,7 +1821,7 @@ def test_autoquant_int4wo(self, device, dtype):`
`1821`	`1821`	`self.assertGreater(compute_error(ref, out), 20)`
`1822`	`1822`
`1823`	`1823`	`@parameterized.expand(COMMON_DEVICE_DTYPE)`
`1824`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
	`1824`	`+ @unittest.skipIf(not is_sm_at_least_90(), "Need cuda arch greater than SM90")`
`1825`	`1825`	`@unittest.skipIf(`
`1826`	`1826`	`not TORCH_VERSION_AT_LEAST_2_5, "autoquant int4 option requires 2.5+."`
`1827`	`1827`	`)`