config migration: int*

vkuzo · vkuzo · commit c3af5c0a064b · 2025-02-10T20:33:01.000-08:00
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 7d497ba20124e953f7c4063b436aa2541981bdef ghstack-comment-id: 2649752838 Pull Request resolved: #1696
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -33,6 +33,7 @@
     float8_dynamic_activation_float8_weight,
     float8_static_activation_float8_weight,
     float8_weight_only,
+    int4_dynamic_activation_int4_weight,
     int4_weight_only,
     int8_dynamic_activation_int4_weight,
     int8_dynamic_activation_int8_weight,
@@ -50,6 +51,7 @@
     TORCH_VERSION_AT_LEAST_2_5,
     TORCH_VERSION_AT_LEAST_2_6,
     is_sm_at_least_89,
+    is_sm_at_least_90,
     unwrap_tensor_subclass,
 )
 
@@ -798,6 +800,10 @@ def test_int4wo_cpu(self, dtype, x_dim):
             float8_weight_only(),
             float8_dynamic_activation_float8_weight(),
             float8_static_activation_float8_weight(scale=torch.tensor([1.0])),
+            int4_dynamic_activation_int4_weight(),
+            int8_dynamic_activation_int8_weight(),
+            int8_dynamic_activation_int4_weight(),
+            int8_weight_only(),
         ],
     )
     def test_workflow_e2e_numerics(self, config):
@@ -816,6 +822,11 @@ def test_workflow_e2e_numerics(self, config):
             and not is_sm_at_least_89()
         ):
             return unittest.skip("requires CUDA capability 8.9 or greater")
+        elif (
+            isinstance(config, int4_dynamic_activation_int4_weight)
+            and is_sm_at_least_90()
+        ):
+            return unittest.skip("only supported on CUDA capability 8.9, not greater")
 
         # scale has to be moved to cuda here because the parametrization init
         # code happens before gating for cuda availability
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -49,7 +49,11 @@
     Float8DynamicActivationFloat8WeightConfig,
     Float8StaticActivationFloat8WeightConfig,
     Float8WeightOnlyConfig,
+    Int4DynamicActivationInt4WeightConfig,
     Int4WeightOnlyConfig,
+    Int8DynamicActivationInt4WeightConfig,
+    Int8DynamicActivationInt8WeightConfig,
+    Int8WeightOnlyConfig,
     float8_dynamic_activation_float8_weight,
     float8_static_activation_float8_weight,
     float8_weight_only,
@@ -123,7 +127,11 @@
     "fpx_weight_only",
     "gemlite_uintx_weight_only",
     "swap_conv2d_1x1_to_linear",
+    "Int4DynamicActivationInt4WeightConfig",
+    "Int8DynamicActivationInt4WeightConfig",
+    "Int8DynamicActivationInt8WeightConfig",
     "Int4WeightOnlyConfig",
+    "Int8WeightOnlyConfig",
     "Float8WeightOnlyConfig",
     "Float8DynamicActivationFloat8WeightConfig",
     "Float8StaticActivationFloat8WeightConfig",
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py