pytorch
diff --git a/‎ruff.toml
Lines changed: 5 additions & 12 deletions b/‎ruff.toml
Lines changed: 5 additions & 12 deletions
diff --git a/‎torchao/__init__.py
Lines changed: 14 additions & 8 deletions b/‎torchao/__init__.py
Lines changed: 14 additions & 8 deletions
diff --git a/‎torchao/prototype/autoround/__init__.py
Lines changed: 6 additions & 0 deletions b/‎torchao/prototype/autoround/__init__.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎torchao/prototype/autoround/autoround_llm.py
Lines changed: 1 addition & 2 deletions b/‎torchao/prototype/autoround/autoround_llm.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎torchao/prototype/autoround/core.py
Lines changed: 1 addition & 2 deletions b/‎torchao/prototype/autoround/core.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎torchao/prototype/autoround/eval_autoround.py
Lines changed: 2 additions & 2 deletions b/‎torchao/prototype/autoround/eval_autoround.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎torchao/prototype/autoround/multi_tensor.py
Lines changed: 2 additions & 1 deletion b/‎torchao/prototype/autoround/multi_tensor.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchao/prototype/autoround/utils.py
Lines changed: 1 addition & 1 deletion b/‎torchao/prototype/autoround/utils.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchao/prototype/awq/__init__.py
Lines changed: 8 additions & 2 deletions b/‎torchao/prototype/awq/__init__.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎torchao/prototype/awq/api.py
Lines changed: 73 additions & 41 deletions b/‎torchao/prototype/awq/api.py
Lines changed: 73 additions & 41 deletions
@@ -3,20 +3,13 @@
 # To add a new path: Simply add it to the 'include' list.
 # Example: To lint all files in every subfolder of 'test', add "test/**/*"
 include = [
-    "torchao/float8/**/*.py",
-    "torchao/quantization/**/*.py",
-    "torchao/dtypes/**/*.py",
-    "torchao/sparsity/**/*.py",
-    "torchao/profiler/**/*.py",
-    "torchao/testing/**/*.py",
-    "torchao/_models/**/*.py",
-    "torchao/kernel/**/*.py",
-    "torchao/prototype/low_bit_optim/**.py",
-    "torchao/utils.py",
-    "torchao/ops.py",
-    "torchao/_executorch_ops.py",
+    "torchao/**/*.py",
     "test/**/*.py",
 ]
 
+exclude = [
+    "torchao/experimental/**/*.py",
+]
+
 lint.select = ["F", "I"]
 lint.ignore = ["E731"]
@@ -1,28 +1,33 @@
-import torch
 import logging
 
 # torch/nested/_internal/nested_tensor.py:417: UserWarning: Failed to initialize NumPy: No module named 'numpy'
 import warnings
-warnings.filterwarnings("ignore", message="Failed to initialize NumPy: No module named 'numpy'")
+
+import torch
+
+warnings.filterwarnings(
+    "ignore", message="Failed to initialize NumPy: No module named 'numpy'"
+)
 
 
 # We use this "hack" to set torchao.__version__ correctly
 # the version of ao is dependent on environment variables for multiple architectures
 # For local development this will default to whatever is version.txt
 # For release builds this will be set the version+architecture_postfix
-from importlib.metadata import version, PackageNotFoundError
+from importlib.metadata import PackageNotFoundError, version
+
 try:
     __version__ = version("torchao")
 except PackageNotFoundError:
-    __version__ = 'unknown'  # In case this logic breaks don't break the build
+    __version__ = "unknown"  # In case this logic breaks don't break the build
 
 _IS_FBCODE = (
-    hasattr(torch._utils_internal, "IS_FBSOURCE") and
-    torch._utils_internal.IS_FBSOURCE
+    hasattr(torch._utils_internal, "IS_FBSOURCE") and torch._utils_internal.IS_FBSOURCE
 )
 if not _IS_FBCODE:
     try:
         from pathlib import Path
+
         so_files = list(Path(__file__).parent.glob("_C*.so"))
         assert len(so_files) == 1, f"Expected one _C*.so file, found {len(so_files)}"
         torch.ops.load_library(so_files[0])
@@ -34,14 +39,15 @@
     autoquant,
     quantize_,
 )
-from . import dtypes
-from . import testing
+
+from . import dtypes, testing
 
 __all__ = [
     "dtypes",
     "autoquant",
     "quantize_",
     "testing",
+    "ops",
 ]
 
 # test-pytorchbot
 
@@ -3,3 +3,9 @@
     prepare_model_for_applying_auto_round_,
 )
 from torchao.prototype.autoround.multi_tensor import MultiTensor
+
+__all__ = [
+    "apply_auto_round",
+    "prepare_model_for_applying_auto_round_",
+    "MultiTensor",
+]
@@ -1,5 +1,4 @@
 import argparse
-import logging
 from typing import Optional
 
 import torch
@@ -67,7 +66,7 @@ def quantize_model_with_autoround_(
     multi_t_input_ids = MultiTensor(input_ids_lst)
 
     # The optimization is applied during the forward pass
-    out = model(multi_t_input_ids)
+    model(multi_t_input_ids)
 
     # Step 3. Apply the quantization
     quantize_(model, apply_auto_round(), is_target_module, device=device)
 
@@ -3,12 +3,11 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 
 import torch
-from torch.utils._pytree import tree_flatten, tree_unflatten
 
 import torchao.prototype.autoround.utils as ar_utils
 import torchao.quantization as ao_quant
 from torchao.dtypes import TensorCoreTiledLayout, to_affine_quantized_intx_static
-from torchao.prototype.autoround.multi_tensor import _multi_tensor_config, MultiTensor
+from torchao.prototype.autoround.multi_tensor import MultiTensor, _multi_tensor_config
 from torchao.quantization.quant_primitives import ZeroPointDomain
 from torchao.utils import find_multiple
 
 
@@ -42,7 +42,7 @@ def run_evaluation(model, tokenizer, tasks, compile=False, batch_size=4):
         from lm_eval.evaluator import evaluate
         from lm_eval.models.huggingface import HFLM
         from lm_eval.tasks import get_task_dict
-    except ImportError as e:
+    except ImportError:
         print(
             """
     Error: The 'lm_eval' module was not found.
@@ -70,7 +70,7 @@ def bench_accuracy(model, tokenizer, tasks, msg=""):
         from torchao.prototype.autoround.hf_eval_utils import run_evaluation
 
         torch.cuda.empty_cache()
-        res = run_evaluation(model, tokenizer, tasks=tasks)
+        run_evaluation(model, tokenizer, tasks=tasks)
         torch.cuda.empty_cache()
 
 
 
@@ -101,7 +101,8 @@ def grouped_to_flat(cls, grouped):
                 min(
                     [True]
                     + [  # handle situation where tuples have size 0
-                        tup[0] == x for x in tup  # check all elements match
+                        tup[0] == x
+                        for x in tup  # check all elements match
                     ]
                 )
                 for tup in flat_tups
 
@@ -146,7 +146,7 @@ def get_float_model_info(model_name_or_path, torch_dtype=torch.float32):
     logging.warning(f"Detected decoder class: {decoder_cls}")
     if decoder_cls is None:
         raise ValueError(
-            f"Cannot detect the decoder class from the model, please provide it manually."
+            "Cannot detect the decoder class from the model, please provide it manually."
         )
     return model, tokenizer, decoder_cls
 
 
@@ -1,2 +1,8 @@
-from .api import insert_awq_observer_, awq_uintx
-from .core import AWQObservedLinear
+from .api import awq_uintx, insert_awq_observer_
+from .core import AWQObservedLinear
+
+__all__ = [
+    "awq_uintx",
+    "insert_awq_observer_",
+    "AWQObservedLinear",
+]
@@ -1,28 +1,37 @@
 import torch
-import torch.nn.functional as F
 
+from torchao.dtypes import (
+    TensorCoreTiledLayout,
+    to_affine_quantized_intx,
+)
+from torchao.dtypes.uintx.uintx_layout import _DTYPE_TO_BIT_WIDTH, UintxLayout
+from torchao.quantization import to_weight_tensor_with_linear_activation_scale_metadata
 from torchao.quantization.granularity import PerGroup
+from torchao.quantization.quant_api import _replace_with_custom_fn_if_matches_filter
 from torchao.quantization.quant_primitives import (
+    _DTYPE_TO_QVALUE_BOUNDS,
     MappingType,
     ZeroPointDomain,
-     _DTYPE_TO_QVALUE_BOUNDS,
 )
-from torchao.quantization import to_weight_tensor_with_linear_activation_scale_metadata
-from torchao.quantization.quant_api import _replace_with_custom_fn_if_matches_filter
-from torchao.dtypes.uintx.uintx_layout import _DTYPE_TO_BIT_WIDTH, UintxLayout
-from torchao.dtypes import( 
-    to_affine_quantized_intx,
-    TensorCoreTiledLayout,
+
+from .core import (
+    AWQObservedLinear,
+    AWQObserver,
 )
-from .core import(
-    AWQObserver, 
-    AWQObservedLinear, 
-) 
 
+assert (
+    len(_DTYPE_TO_BIT_WIDTH) > 0
+), "Error importing low bit torch.uint dtypes. Please upgrade to torch 2.3+"
 
-assert len(_DTYPE_TO_BIT_WIDTH) > 0, "Error importing low bit torch.uint dtypes. Please upgrade to torch 2.3+"
 
-def insert_awq_observer_(model: torch.nn.Module, n_validation_examples: int, validation_sequence_len: int,  quant_dtype: torch.dtype = torch.uint4,   scale_search_space_size: int = 20, group_size: int = 128):
+def insert_awq_observer_(
+    model: torch.nn.Module,
+    n_validation_examples: int,
+    validation_sequence_len: int,
+    quant_dtype: torch.dtype = torch.uint4,
+    scale_search_space_size: int = 20,
+    group_size: int = 128,
+):
     """
     Inserts AWQObserver into Linear layers of a given model.
 
@@ -35,58 +44,75 @@ def insert_awq_observer_(model: torch.nn.Module, n_validation_examples: int, val
         group_size: Quantization granularity. Use -1 for channel wise quantization
     """
     _is_linear = lambda m, fqn: isinstance(m, torch.nn.Linear)
-    assert quant_dtype in _DTYPE_TO_BIT_WIDTH or quant_dtype == torch.uint8, "Invalid quant_dtype. Please use torch.uint1 .. torch.uint8"
+    assert (
+        quant_dtype in _DTYPE_TO_BIT_WIDTH or quant_dtype == torch.uint8
+    ), "Invalid quant_dtype. Please use torch.uint1 .. torch.uint8"
     # AQT config
     mapping_type = MappingType.ASYMMETRIC
     quantization_granularity = PerGroup(group_size)
     quant_min = 0
-    quant_max = 255 if quant_dtype == torch.uint8 else 2 ** _DTYPE_TO_BIT_WIDTH[quant_dtype] - 1 
+    quant_max = (
+        255 if quant_dtype == torch.uint8 else 2 ** _DTYPE_TO_BIT_WIDTH[quant_dtype] - 1
+    )
     eps = torch.finfo(torch.float32).eps
     preserve_zero = True
     zero_point_dtype = torch.int64
     zero_point_domain = ZeroPointDomain.INT
-    
 
     def replace_with_observer(layer):
         # creates observer and replaces linear layers with AWQObservedLinear layers
         observer = AWQObserver(
             layer.weight,
-            layer.bias, 
-            quantization_granularity, 
+            layer.bias,
+            quantization_granularity,
             mapping_type,
-            quant_dtype, 
+            quant_dtype,
             n_validation_examples,
             validation_sequence_len,
             scale_search_space_size,
-            preserve_zero = preserve_zero,
-            zero_point_domain = zero_point_domain,
-            zero_point_dtype = zero_point_dtype,
+            preserve_zero=preserve_zero,
+            zero_point_domain=zero_point_domain,
+            zero_point_dtype=zero_point_dtype,
             quant_min=quant_min,
-            quant_max = quant_max,
-            eps = eps)
+            quant_max=quant_max,
+            eps=eps,
+        )
         return AWQObservedLinear.from_float(layer, observer)
+
     _replace_with_custom_fn_if_matches_filter(model, replace_with_observer, _is_linear)
 
+
 def _observed_linear_subclass_inserter(constructor):
     """
     Replaces unquantized AWQObservedLinear instances with quantized linear instances.
 
     Args:
         constructor: the function which applies quantization to the AWQObservedLinear layer
     """
+
     def insert_subclass(observed_linear):
         # creates the new linear layer using constructor
-        linear = torch.nn.Linear(observed_linear.in_features, observed_linear.out_features, observed_linear.bias!=None, device=observed_linear.weight.device, dtype=observed_linear.weight.dtype)
-        linear.weight = torch.nn.Parameter(constructor(observed_linear), requires_grad=False)
+        linear = torch.nn.Linear(
+            observed_linear.in_features,
+            observed_linear.out_features,
+            observed_linear.bias != None,
+            device=observed_linear.weight.device,
+            dtype=observed_linear.weight.dtype,
+        )
+        linear.weight = torch.nn.Parameter(
+            constructor(observed_linear), requires_grad=False
+        )
         linear.bias = observed_linear.bias
         return linear
 
     return insert_subclass
-    
 
-def awq_uintx(quant_dtype: torch.dtype = torch.uint4,
-              group_size: int = 64, 
-              use_hqq: bool = False,):
+
+def awq_uintx(
+    quant_dtype: torch.dtype = torch.uint4,
+    group_size: int = 64,
+    use_hqq: bool = False,
+):
     """
     Quantizes linear layers when passed into quantize_()
 
@@ -95,8 +121,10 @@ def awq_uintx(quant_dtype: torch.dtype = torch.uint4,
         group_size: Quantization granularity. Use -1 for channel wise quantization
         weight_quant_fn: The quantization function to be used, which takes in the weight and returns the quantized weight. If None, then affine uint4 quantization is used
     """
-    assert quant_dtype in _DTYPE_TO_BIT_WIDTH or quant_dtype == torch.uint8, "Invalid quant_dtype. Please use torch.uint1 .. torch.uint8"
-    
+    assert (
+        quant_dtype in _DTYPE_TO_BIT_WIDTH or quant_dtype == torch.uint8
+    ), "Invalid quant_dtype. Please use torch.uint1 .. torch.uint8"
+
     def weight_quant_func(observed_linear):
         equalization_scale = observed_linear.act_obs.calculate_qparams()
         # AQT config
@@ -114,24 +142,28 @@ def weight_quant_func(observed_linear):
             zero_point_dtype = torch.int64
             zero_point_domain = ZeroPointDomain.INT
             _layout = UintxLayout(quant_dtype)
-            
+
         mapping_type = MappingType.ASYMMETRIC
         block_size = (1, group_size)
         quant_min = _DTYPE_TO_QVALUE_BOUNDS[quant_dtype][0]
         quant_max = _DTYPE_TO_QVALUE_BOUNDS[quant_dtype][1]
         qw = to_affine_quantized_intx(
             observed_linear.weight * equalization_scale,
             mapping_type,
-            block_size, 
-            target_dtype, quant_min, 
-            quant_max, eps, 
+            block_size,
+            target_dtype,
+            quant_min,
+            quant_max,
+            eps,
             zero_point_dtype=zero_point_dtype,
             preserve_zero=preserve_zero,
             zero_point_domain=zero_point_domain,
             _layout=_layout,
-            use_hqq=use_hqq
+            use_hqq=use_hqq,
         )
-        
-        return to_weight_tensor_with_linear_activation_scale_metadata(qw, equalization_scale)
-    
+
+        return to_weight_tensor_with_linear_activation_scale_metadata(
+            qw, equalization_scale
+        )
+
     return _observed_linear_subclass_inserter(weight_quant_func)
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,8 @@ def grouped_to_flat(cls, grouped):`
`101`	`101`	`min(`
`102`	`102`	`[True]`
`103`	`103`	`+ [ # handle situation where tuples have size 0`
`104`		`- tup[0] == x for x in tup # check all elements match`
	`104`	`+ tup[0] == x`
	`105`	`+ for x in tup # check all elements match`
`105`	`106`	`]`
`106`	`107`	`)`
`107`	`108`	`for tup in flat_tups`
Original file line number	Diff line number	Diff line change
`@@ -146,7 +146,7 @@ def get_float_model_info(model_name_or_path, torch_dtype=torch.float32):`
`146`	`146`	`logging.warning(f"Detected decoder class: {decoder_cls}")`
`147`	`147`	`if decoder_cls is None:`
`148`	`148`	`raise ValueError(`
`149`		`- f"Cannot detect the decoder class from the model, please provide it manually."`
	`149`	`+ "Cannot detect the decoder class from the model, please provide it manually."`
`150`	`150`	`)`
`151`	`151`	`return model, tokenizer, decoder_cls`
`152`	`152`