Skip to content

Commit 1110405

Browse files
authored
Merge branch 'main' into skipROCmTest
2 parents 88448d2 + ea7910e commit 1110405

22 files changed

+285
-299
lines changed

.github/workflows/float8_test.yml

+3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ jobs:
2929
gpu-arch-type: "cuda"
3030
gpu-arch-version: "12.1"
3131

32+
permissions:
33+
id-token: write
34+
contents: read
3235
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3336
with:
3437
timeout: 60

.github/workflows/nightly_smoke_test.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ concurrency:
1111
cancel-in-progress: true
1212

1313
env:
14-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
14+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
1515

1616
jobs:
1717
test:
@@ -25,7 +25,9 @@ jobs:
2525
gpu-arch-type: "cuda"
2626
gpu-arch-version: "12.1"
2727

28-
28+
permissions:
29+
id-token: write
30+
contents: read
2931
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3032
with:
3133
runner: ${{ matrix.runs-on }}

.github/workflows/regression_test.yml

+6-10
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@ concurrency:
1717
env:
1818
HF_TOKEN: ${{ secrets.HF_TOKEN }}
1919

20-
permissions:
21-
id-token: write
22-
contents: read
23-
2420
jobs:
2521
test-nightly:
2622
strategy:
@@ -37,16 +33,13 @@ jobs:
3733
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu'
3834
gpu-arch-type: "cpu"
3935
gpu-arch-version: ""
40-
- name: ROCM Nightly
41-
runs-on: linux.rocm.gpu.2
42-
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.3'
43-
gpu-arch-type: "rocm"
44-
gpu-arch-version: "6.3"
4536

37+
permissions:
38+
id-token: write
39+
contents: read
4640
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
4741
with:
4842
timeout: 120
49-
no-sudo: ${{ matrix.gpu-arch-type == 'rocm' }}
5043
runner: ${{ matrix.runs-on }}
5144
gpu-arch-type: ${{ matrix.gpu-arch-type }}
5245
gpu-arch-version: ${{ matrix.gpu-arch-version }}
@@ -81,6 +74,7 @@ jobs:
8174
torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
8275
gpu-arch-type: "cuda"
8376
gpu-arch-version: "12.1"
77+
8478
- name: CPU 2.3
8579
runs-on: linux.4xlarge
8680
torch-spec: 'torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu'
@@ -108,6 +102,8 @@ jobs:
108102
conda create -n venv python=3.9 -y
109103
conda activate venv
110104
echo "::group::Install newer objcopy that supports --set-section-alignment"
105+
yum install -y devtoolset-10-binutils
106+
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
111107
python -m pip install --upgrade pip
112108
pip install ${{ matrix.torch-spec }}
113109
pip install -r dev-requirements.txt

test/__init__.py

Whitespace-only changes.

test/dtypes/test_affine_quantized.py

-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ def test_tensor_core_layout_transpose(self):
9090
aqt_shape = aqt.shape
9191
self.assertEqual(aqt_shape, shape)
9292

93-
@skip_if_rocm("ROCm development in progress")
9493
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
9594
@common_utils.parametrize(
9695
"apply_quant", get_quantization_functions(True, True, "cuda", True)
@@ -170,7 +169,6 @@ def apply_uint6_weight_only_quant(linear):
170169

171170
deregister_aqt_quantized_linear_dispatch(dispatch_condition)
172171

173-
@skip_if_rocm("ROCm development in progress")
174172
@common_utils.parametrize("apply_quant", get_quantization_functions(True, True))
175173
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
176174
def test_print_quantized_module(self, apply_quant):
@@ -183,7 +181,6 @@ class TestAffineQuantizedBasic(TestCase):
183181
COMMON_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
184182
COMMON_DTYPES = [torch.bfloat16]
185183

186-
@skip_if_rocm("ROCm development in progress")
187184
@common_utils.parametrize("device", COMMON_DEVICES)
188185
@common_utils.parametrize("dtype", COMMON_DTYPES)
189186
def test_flatten_unflatten(self, device, dtype):

test/dtypes/test_floatx.py

-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ def test_to_copy_device(self, ebits, mbits):
108108
@parametrize("ebits,mbits", _Floatx_DTYPES)
109109
@parametrize("bias", [False, True])
110110
@parametrize("dtype", [torch.half, torch.bfloat16])
111-
@skip_if_rocm("ROCm development in progress")
112111
@unittest.skipIf(is_fbcode(), reason="broken in fbcode")
113112
def test_fpx_weight_only(self, ebits, mbits, bias, dtype):
114113
N, OC, IC = 4, 256, 64

test/float8/test_base.py

-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,6 @@ def test_linear_from_config_params(
424424
@pytest.mark.parametrize("x_shape", [(16, 16), (2, 16, 16), (3, 2, 16, 16)])
425425
@pytest.mark.parametrize("linear_bias", [True, False])
426426
@unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
427-
@skip_if_rocm("ROCm development in progress")
428427
def test_linear_from_recipe(
429428
self,
430429
recipe_name,

test/hqq/test_hqq_affine.py

-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ def test_hqq_plain_5bit(self):
111111
ref_dot_product_error=0.000704,
112112
)
113113

114-
@skip_if_rocm("ROCm development in progress")
115114
def test_hqq_plain_4bit(self):
116115
self._test_hqq(
117116
dtype=torch.uint4,

test/integration/test_integration.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,6 @@ def test_per_token_linear_cpu(self):
570570
self._test_per_token_linear_impl("cpu", dtype)
571571

572572
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
573-
@skip_if_rocm("ROCm development in progress")
574573
def test_per_token_linear_cuda(self):
575574
for dtype in (torch.float32, torch.float16, torch.bfloat16):
576575
self._test_per_token_linear_impl("cuda", dtype)
@@ -689,7 +688,6 @@ def test_dequantize_int8_weight_only_quant_subclass(self, device, dtype):
689688
@parameterized.expand(COMMON_DEVICE_DTYPE)
690689
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
691690
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
692-
@skip_if_rocm("ROCm development in progress")
693691
def test_dequantize_int4_weight_only_quant_subclass(self, device, dtype):
694692
if device == "cpu":
695693
self.skipTest(f"Temporarily skipping for {device}")
@@ -709,7 +707,6 @@ def test_dequantize_int4_weight_only_quant_subclass(self, device, dtype):
709707
@parameterized.expand(COMMON_DEVICE_DTYPE)
710708
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
711709
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
712-
@skip_if_rocm("ROCm development in progress")
713710
def test_dequantize_int4_weight_only_quant_subclass_grouped(self, device, dtype):
714711
if device == "cpu":
715712
self.skipTest(f"Temporarily skipping for {device}")
@@ -903,7 +900,6 @@ def test_aq_float8_dynamic_quant_tensorwise_scaling_subclass(self, device, dtype
903900
@parameterized.expand(COMMON_DEVICE_DTYPE)
904901
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
905902
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
906-
@skip_if_rocm("ROCm development in progress")
907903
def test_int4_weight_only_quant_subclass(self, device, dtype):
908904
if device == "cpu":
909905
self.skipTest(f"Temporarily skipping for {device}")
@@ -923,7 +919,6 @@ def test_int4_weight_only_quant_subclass(self, device, dtype):
923919
@parameterized.expand(COMMON_DEVICE_DTYPE)
924920
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
925921
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
926-
@skip_if_rocm("ROCm development in progress")
927922
def test_int4_weight_only_quant_subclass_grouped(self, device, dtype):
928923
if dtype != torch.bfloat16:
929924
self.skipTest(f"Fails for {dtype}")
@@ -1827,7 +1822,7 @@ def test_autoquant_int4wo(self, device, dtype):
18271822
self.assertGreater(compute_error(ref, out), 20)
18281823

18291824
@parameterized.expand(COMMON_DEVICE_DTYPE)
1830-
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
1825+
@unittest.skipIf(not is_sm_at_least_90(), "Need cuda arch greater than SM90")
18311826
@unittest.skipIf(
18321827
not TORCH_VERSION_AT_LEAST_2_5, "autoquant int4 option requires 2.5+."
18331828
)

test/kernel/test_galore_downproj.py

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030

3131
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
3232
@pytest.mark.parametrize("M, N, rank, allow_tf32, fp8_fast_accum, dtype", TEST_CONFIGS)
33-
@skip_if_rocm("ROCm development in progress")
3433
def test_galore_downproj(M, N, rank, allow_tf32, fp8_fast_accum, dtype):
3534
torch.backends.cuda.matmul.allow_tf32 = allow_tf32
3635
MAX_DIFF = MAX_DIFF_tf32 if allow_tf32 else MAX_DIFF_no_tf32

test/prototype/test_awq.py

-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ def test_awq_loading(device, qdtype):
117117

118118
@pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="requires nightly pytorch")
119119
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
120-
@skip_if_rocm("ROCm development in progress")
121120
def test_save_weights_only():
122121
dataset_size = 100
123122
l1, l2, l3 = 512, 256, 128

test/prototype/test_low_bit_optim.py

-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ class TestOptim(TestCase):
113113
)
114114
@parametrize("dtype", [torch.float32, torch.bfloat16])
115115
@parametrize("device", _DEVICES)
116-
@skip_if_rocm("ROCm development in progress")
117116
def test_optim_smoke(self, optim_name, dtype, device):
118117
if optim_name.endswith("Fp8") and device == "cuda":
119118
if not TORCH_VERSION_AT_LEAST_2_4:

test/prototype/test_splitk.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@
1313
except ImportError:
1414
triton_available = False
1515

16+
1617
from torchao.utils import skip_if_compute_capability_less_than, skip_if_rocm
1718

1819

20+
1921
@unittest.skipIf(not triton_available, "Triton is required but not available")
2022
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
2123
class TestFP8Gemm(TestCase):
2224
@skip_if_compute_capability_less_than(9.0)
23-
@skip_if_rocm("ROCm development in progress")
2425
def test_gemm_split_k(self):
2526
dtype = torch.float16
2627
qdtype = torch.float8_e4m3fn

test/quantization/test_galore_quant.py

-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def test_galore_quantize_blockwise(dim1, dim2, dtype, signed, blocksize):
8383
"dim1,dim2,dtype,signed,blocksize",
8484
TEST_CONFIGS,
8585
)
86-
@skip_if_rocm("ROCm development in progress")
8786
def test_galore_dequant_blockwise(dim1, dim2, dtype, signed, blocksize):
8887
g = torch.randn(dim1, dim2, device="cuda", dtype=dtype) * 0.01
8988

test/quantization/test_marlin_qqq.py

-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def setUp(self):
4545
)
4646

4747
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
48-
@skip_if_rocm("ROCm development in progress")
4948
def test_marlin_qqq(self):
5049
output_ref = self.model(self.input)
5150
for group_size in [-1, 128]:
@@ -67,7 +66,6 @@ def test_marlin_qqq(self):
6766

6867
@pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="Needs PyTorch 2.5+")
6968
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
70-
@skip_if_rocm("ROCm development in progress")
7169
def test_marlin_qqq_compile(self):
7270
model_copy = copy.deepcopy(self.model)
7371
model_copy.forward = torch.compile(model_copy.forward, fullgraph=True)

test/sparsity/test_marlin.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def setUp(self):
3737
)
3838

3939
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
40-
@skip_if_rocm("ROCm development in progress")
4140
def test_quant_sparse_marlin_layout_eager(self):
4241
apply_fake_sparsity(self.model)
4342
model_copy = copy.deepcopy(self.model)
@@ -49,13 +48,13 @@ def test_quant_sparse_marlin_layout_eager(self):
4948
# Sparse + quantized
5049
quantize_(self.model, int4_weight_only(layout=MarlinSparseLayout()))
5150
sparse_result = self.model(self.input)
51+
5252
assert torch.allclose(
5353
dense_result, sparse_result, atol=3e-1
5454
), "Results are not close"
5555

5656
@pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="Needs PyTorch 2.5+")
5757
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
58-
@skip_if_rocm("ROCm development in progress")
5958
def test_quant_sparse_marlin_layout_compile(self):
6059
apply_fake_sparsity(self.model)
6160
model_copy = copy.deepcopy(self.model)

test/test_ops.py

-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
from torchao.sparsity.marlin import inject_24, marlin_24_workspace, pack_to_marlin_24
2020
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5, compute_max_diff, is_fbcode
2121

22-
if torch.version.hip is not None:
23-
pytest.skip("Skipping the test in ROCm", allow_module_level=True)
24-
2522
if is_fbcode():
2623
pytest.skip(
2724
"Skipping the test in fbcode since we don't have TARGET file for kernels"

test/test_s8s4_linear_cutlass.py

-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
from torchao.quantization.utils import group_quantize_tensor_symmetric
88
from torchao.utils import compute_max_diff
99

10-
if torch.version.hip is not None:
11-
pytest.skip("Skipping the test in ROCm", allow_module_level=True)
12-
1310
S8S4_LINEAR_CUTLASS_DTYPE = [torch.float16, torch.bfloat16]
1411
S8S4_LINEAR_CUTLASS_BATCH_SIZE = [1, 4, 8, 16, 32, 64]
1512
S8S4_LINEAR_CUTLASS_SIZE_MNK = [

test/test_utils.py

-29
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,11 @@
1-
import functools
21
import unittest
32
from unittest.mock import patch
43

5-
import pytest
64
import torch
75

86
from torchao.utils import TorchAOBaseTensor, torch_version_at_least
97

108

11-
def skip_if_rocm(message=None):
12-
"""Decorator to skip tests on ROCm platform with custom message.
13-
14-
Args:
15-
message (str, optional): Additional information about why the test is skipped.
16-
"""
17-
18-
def decorator(func):
19-
@functools.wraps(func)
20-
def wrapper(*args, **kwargs):
21-
if torch.version.hip is not None:
22-
skip_message = "Skipping the test in ROCm"
23-
if message:
24-
skip_message += f": {message}"
25-
pytest.skip(skip_message)
26-
return func(*args, **kwargs)
27-
28-
return wrapper
29-
30-
# Handle both @skip_if_rocm and @skip_if_rocm() syntax
31-
if callable(message):
32-
func = message
33-
message = None
34-
return decorator(func)
35-
return decorator
36-
37-
389
class TestTorchVersionAtLeast(unittest.TestCase):
3910
def test_torch_version_at_least(self):
4011
test_cases = [

0 commit comments

Comments
 (0)