@@ -54,36 +54,6 @@ def _linear_impl(input_tensor, weight_tensor, bias):
54
54
weight_tensor = weight_tensor .dequantize ()
55
55
return torch .nn .functional .linear (input_tensor , weight_tensor , bias )
56
56
57
- # assert isinstance(input_tensor, )
58
- # if isinstance(input_tensor, AffineQuantizedTensor):
59
-
60
- # input_tensor = input_tensor.dequantize()
61
- # if isinstance(weight_tensor, AffineQuantizedTensor):
62
- # weight_tensor = weight_tensor.dequantize()
63
- # return torch.nn.functional.linear(input_tensor, weight_tensor, bias)
64
-
65
- # x = per_token_dynamic_quant(input_tensor)
66
-
67
- # w_int_data = weight_tensor.tensor_impl.int_data
68
- # w_scale = weight_tensor.tensor_impl.scale
69
- # w_zero_point = weight_tensor.tensor_impl.zero_point
70
- # assert len(weight_tensor.block_size) == 2
71
- # assert weight_tensor.block_size[0] == 1
72
- # group_size = weight_tensor.block_size[1]
73
-
74
- # w_dq = _quantized_decomposed_dequantize_per_channel_group_wrapper(
75
- # w_int_data,
76
- # w_scale,
77
- # w_zero_point,
78
- # weight_tensor.quant_min,
79
- # weight_tensor.quant_max,
80
- # torch.int8,
81
- # group_size,
82
- # torch.float32,
83
- # )
84
-
85
- # return torch.nn.functional.linear(x, w_dq, bias)
86
-
87
57
88
58
register_aqt_quantized_linear_dispatch (
89
59
_linear_check ,
0 commit comments