diff --git a/backends/arm/_passes/decompose_div_tensor_mode.py b/backends/arm/_passes/decompose_div_tensor_mode.py index cc5440b4e5b..e0e0c219135 100644 --- a/backends/arm/_passes/decompose_div_tensor_mode.py +++ b/backends/arm/_passes/decompose_div_tensor_mode.py @@ -4,23 +4,33 @@ # LICENSE file in the root directory of this source tree. -from typing import Set, Type +from typing import cast, Literal, Set, Type import torch from executorch.backends.arm._passes.arm_pass import ArmOpTargetedPass from executorch.backends.arm._passes.decompose_div_pass import DecomposeDivPass +from executorch.backends.arm.tosa.specification import get_context_spec from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import ExportPass edge_div_mode_ops = (exir_ops.edge.aten.div.Tensor_mode,) aten_div_mode_ops = (torch.ops.aten.div.Tensor_mode,) +RoundingMode = Literal["trunc", "floor"] edge_unary = { "div": exir_ops.edge.aten.div.Tensor, "floor": exir_ops.edge.aten.floor.default, "ceil": exir_ops.edge.aten.ceil.default, + "eq": exir_ops.edge.aten.eq.Tensor, "full": exir_ops.edge.aten.full.default, "gt": exir_ops.edge.aten.gt.Tensor, + "logical_and": exir_ops.edge.aten.logical_and.default, + "logical_not": exir_ops.edge.aten.logical_not.default, + "logical_xor": exir_ops.edge.aten.logical_xor.default, + "intdiv": exir_ops.backend.tosa.INTDIV.default, + "mul": exir_ops.edge.aten.mul.Tensor, + "sub": exir_ops.edge.aten.sub.Tensor, + "to": exir_ops.edge.dim_order_ops._to_dim_order_copy.default, "where": exir_ops.edge.aten.where.self, } @@ -28,8 +38,15 @@ "div": torch.ops.aten.div.Tensor, "floor": torch.ops.aten.floor.default, "ceil": torch.ops.aten.ceil.default, + "eq": torch.ops.aten.eq.Tensor, "full": torch.ops.aten.full.default, "gt": torch.ops.aten.gt.Tensor, + "logical_and": torch.ops.aten.logical_and.default, + "logical_not": torch.ops.aten.logical_not.default, + "logical_xor": torch.ops.aten.logical_xor.default, + "mul": torch.ops.aten.mul.Tensor, + "sub": torch.ops.aten.sub.Tensor, + "to": torch.ops.aten.to.dtype, "where": torch.ops.aten.where.self, } @@ -43,9 +60,9 @@ def _get_opset(op): class DecomposeDivTensorModePass(ArmOpTargetedPass): - """Rewrites aten.div.Tensor_mode into. + """Rewrites aten.div.Tensor_mode into supported arithmetic ops. - Example: + Floating-point flow: rounding_mode=None -> div(a, b) rounding_mode="floor" -> floor(div(a, b)) rounding_mode="trunc" -> where( @@ -54,12 +71,159 @@ class DecomposeDivTensorModePass(ArmOpTargetedPass): floor(div(a, b)), ) + Integer flow: + During transform-for-annotation, keep div.Tensor_mode intact, don't quantize it. + During backend lowering, rewrite the div to a TOSA INTDIV (corresponding to trunc rounding_mode) + + correcting factor for floor mode. + """ _passes_required_after: Set[Type[ExportPass]] = {DecomposeDivPass} target_ops = edge_div_mode_ops + aten_div_mode_ops check_allowed_to_transform = True + def _is_integer_tensor(self, arg) -> bool: + data = getattr(arg, "data", None) + if data is not None: + return arg.data.dtype in { + torch.uint8, + torch.int8, + torch.int16, + torch.int32, + torch.int64, + } + return isinstance(arg, int) + + def _cast(self, opset, arg, dtype: torch.dtype, meta): + if isinstance(arg, int): + if dtype.is_floating_point: + return float(arg) + else: + return arg + if isinstance(arg, float): + if dtype.is_floating_point: + return arg + else: + return int(arg) + data = getattr(arg, "data", None) + if data is not None and data.dtype == dtype: + return arg + return super().call_operator( + opset["to"], + (arg,), + {"dtype": dtype}, + meta, + updated=True, + ) + + def _full(self, opset, value, dtype: torch.dtype, meta): + return super().call_operator( + opset["full"], + args=((1,) * len(meta["val"].size()), value), + kwargs={"dtype": dtype, "device": meta["val"].device}, + meta=meta, + updated=True, + ) + + def _correct_intdiv_floor( + self, opset, numerator, denominator, trunced_quotient, meta + ): + """Apply a correcting factor for converting the truncated division to + floored division. + + Done by subtracting one from the result when, elementwise, + - The remainder is nonzero (otherwise the division is even and the rounding trivial) + - The numerator and denominator have different signs (causing a negative quotient) + The sign of the quotient can't be checked directly, there are cases when it is 0 and still needs correction. + + """ + # Condition 1: non-zero remainder + product = super().call_operator( + opset["mul"], (trunced_quotient, denominator), {}, meta, updated=True + ) + remainder = super().call_operator( + opset["sub"], (numerator, product), {}, meta, updated=True + ) + zero = self._full(opset, 0, torch.int32, meta) + remainder_is_zero = super().call_operator( + opset["eq"], (remainder, zero), {}, meta, updated=True + ) + remainder_is_nonzero = super().call_operator( + opset["logical_not"], (remainder_is_zero,), {}, meta, updated=True + ) + # Condition 2: un-rounded quotient is negative + a_is_negative = super().call_operator( + opset["gt"], (zero, numerator), {}, meta, updated=True + ) + b_is_negative = super().call_operator( + opset["gt"], (zero, denominator), {}, meta, updated=True + ) + signs_differ = super().call_operator( + opset["logical_xor"], + (a_is_negative, b_is_negative), + {}, + meta, + updated=True, + ) + # Use conditions to correct quotient. + needs_correction = super().call_operator( + opset["logical_and"], + (remainder_is_nonzero, signs_differ), + {}, + meta, + updated=True, + ) + # (TOSA spec enforces that int(bool_var) == 1 ? bool_var : 0) + correction = self._cast(opset, needs_correction, torch.int32, meta) + return super().call_operator( + opset["sub"], (trunced_quotient, correction), {}, meta, updated=True + ) + + def _call_integer_div(self, opset, a, b, rounding_mode: RoundingMode, meta): + """Cast inputs to int32, do TOSA INTDIV, and apply correcting factor for + floor rounding mode. + """ + + a_int32 = self._cast(opset, a, torch.int32, meta) + b_int32 = self._cast(opset, b, torch.int32, meta) + intdiv = super().call_operator( + opset["intdiv"], + (a_int32, b_int32), + {}, + meta, + updated=True, + ) + if rounding_mode == "floor": + intdiv = self._correct_intdiv_floor(opset, a_int32, b_int32, intdiv, meta) + + output_dtype = meta["val"].dtype + return self._cast(opset, intdiv, output_dtype, meta) + + def _call_fp_div(self, opset, a, b, rounding_mode: RoundingMode | None, meta): + q = super().call_operator(opset["div"], (a, b), {}, meta, updated=True) + + match rounding_mode: + case None: + return q + case "floor": + return super().call_operator( + opset["floor"], (q,), {}, meta, updated=True + ) + case "trunc": + zero = self._full(opset, 0.0, torch.float32, meta) + is_neg = super().call_operator( + opset["gt"], (zero, q), {}, meta, updated=True + ) + ceilq = super().call_operator( + opset["ceil"], (q,), {}, meta, updated=True + ) + floorq = super().call_operator( + opset["floor"], (q,), {}, meta, updated=True + ) + return super().call_operator( + opset["where"], (is_neg, ceilq, floorq), {}, meta, updated=True + ) + def call_operator(self, op, args, kwargs, meta): if op not in self.target_ops or not self.allowed_to_transform(meta): return super().call_operator(op, args, kwargs, meta) @@ -67,35 +231,53 @@ def call_operator(self, op, args, kwargs, meta): opset = _get_opset(op) a, b = args[0], args[1] + a_is_int = self._is_integer_tensor(a) + b_is_int = self._is_integer_tensor(b) rounding_mode = kwargs.get("rounding_mode", None) if rounding_mode is None and len(args) > 2: rounding_mode = args[2] + if rounding_mode not in ("floor", "trunc", None): + raise RuntimeError( + "Integer div.Tensor_mode requires rounding_mode floor, trunc, or None." + f"got {rounding_mode!r}" + ) + rounding_mode = cast(RoundingMode | None, rounding_mode) - q = super().call_operator(opset["div"], (a, b), {}, meta, updated=True) + int_operation = rounding_mode is not None and a_is_int and b_is_int + sufficient_int_support = ( + rounding_mode == "trunc" or get_context_spec().support_integer() + ) + sufficient_int_support &= not get_context_spec().is_U55_subset - if rounding_mode is None: - return q + if int_operation and sufficient_int_support: + """Integer operation and necessary int ops supported -> pure integer + path. + """ + if self.is_tfa_pass: + # No quantization neccessary, so don't do anything in TFA. + return super().call_operator(op, args, kwargs, meta) + return self._call_integer_div(opset, a, b, rounding_mode, meta) + else: + """Otherwise floating point operation -> do fp path. - if rounding_mode == "floor": - return super().call_operator(opset["floor"], (q,), {}, meta, updated=True) - - if rounding_mode == "trunc": - zero = super().call_operator( - opset["full"], - args=((1,) * len(meta["val"].size()), 0.0), - kwargs={"dtype": torch.float32, "device": meta["val"].device}, - meta=meta, - updated=True, - ) - is_neg = super().call_operator( - opset["gt"], (zero, q), {}, meta, updated=True - ) - ceilq = super().call_operator(opset["ceil"], (q,), {}, meta, updated=True) - floorq = super().call_operator(opset["floor"], (q,), {}, meta, updated=True) - return super().call_operator( - opset["where"], (is_neg, ceilq, floorq), {}, meta, updated=True + Cast to and from fp if neccessary. + + """ + if a_is_int: + a = self._cast(opset, a, torch.float32, meta) + if b_is_int: + b = self._cast(opset, b, torch.float32, meta) + + result = self._call_fp_div( + opset, + a, + b, + rounding_mode, + meta, ) - raise RuntimeError( - f"Unsupported rounding_mode for div.Tensor_mode: {rounding_mode!r}" - ) + output_dtype = meta["val"].dtype + if output_dtype != torch.float32: + result = self._cast(opset, result, output_dtype, meta) + + return result diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py index 09e90b88e36..713d6ef354a 100644 --- a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py +++ b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py @@ -305,6 +305,25 @@ def is_foldable(node: Node) -> bool: return False return True + @staticmethod + def _correct_output_dtype(node: torch.fx.Node): + if node.target not in { + exir_ops.edge.aten.sum.dim_IntList, + exir_ops.edge.dim_order_ops._to_dim_order_copy.default, + }: + return + if len(node.meta["output_qparams"]) == 0: + return + output_qparams = cast(QuantArgs, node.meta["output_qparams"][0]) + + if node.target == exir_ops.edge.dim_order_ops._to_dim_order_copy.default: + if output_qparams.scale != 1.0 or output_qparams.zp != 0.0: + raise ValueError( + f"Expected quantized {node.target} '{node.name}' to have unit scale and zero point." + ) + + set_node_arg(node, "dtype", output_qparams.dtype) + def call(self, graph_module: GraphModule) -> PassResult: # noqa: C901 # Loop over the graph nodes and find any node in the 'targeted_ops' list. @@ -355,13 +374,7 @@ def call(self, graph_module: GraphModule) -> PassResult: # noqa: C901 # Some op(s) contain a "dtype" key in their node kwargs. Set this # to the type of output qparams. - output_qparams = n.meta["output_qparams"] - if ( - n.target in {exir_ops.edge.aten.sum.dim_IntList} - and len(output_qparams) > 0 - ): - output_dtype = output_qparams[0].dtype - set_node_arg(n, "dtype", output_dtype) + FoldAndAnnotateQParamsPass._correct_output_dtype(n) if n.target in ( torch.ops.higher_order.cond, diff --git a/backends/arm/operator_support/to_dim_order_copy_support.py b/backends/arm/operator_support/to_dim_order_copy_support.py index a02a8e16276..b7062ebbb97 100644 --- a/backends/arm/operator_support/to_dim_order_copy_support.py +++ b/backends/arm/operator_support/to_dim_order_copy_support.py @@ -139,6 +139,20 @@ def _merge_supported_types( torch.float8_e5m2: [torch.bfloat16], } + @staticmethod + def _is_quantized_identity_cast(node: torch.fx.Node) -> bool: + for user in node.users: + if ( + not user.target + == exir_ops.edge.quantized_decomposed.quantize_per_tensor.default + ): + return False + scale = user.args[1] + zp = user.args[2] + if scale != 1.0 or zp != 0.0: + return False + return True + def is_node_tosa_supported( # noqa: C901 self, node: fx.Node, tosa_spec: TosaSpecification ) -> bool: @@ -228,6 +242,11 @@ def is_node_tosa_supported( # noqa: C901 ) return False if output_val.dtype not in supported_dtypes[input_dtype]: + if ( + tosa_spec.support_integer() + and ToCopySupported._is_quantized_identity_cast(node) + ): + return True self.reporter.report_reject( node, ( diff --git a/backends/arm/operator_support/tosa_profile_supported_op_lists.py b/backends/arm/operator_support/tosa_profile_supported_op_lists.py index dc448ba0d5f..4495ff90450 100644 --- a/backends/arm/operator_support/tosa_profile_supported_op_lists.py +++ b/backends/arm/operator_support/tosa_profile_supported_op_lists.py @@ -126,6 +126,7 @@ exir_ops.edge.aten.celu.default, exir_ops.edge.aten.bitwise_not.default, exir_ops.edge.aten.copy.default, + exir_ops.edge.aten.div.Tensor_mode, exir_ops.edge.aten.tan.default, exir_ops.edge.aten.silu.default, exir_ops.edge.aten.detach_copy.default, diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py index 13693bd235d..ad4c85c5030 100644 --- a/backends/arm/quantizer/quantization_annotator.py +++ b/backends/arm/quantizer/quantization_annotator.py @@ -956,6 +956,27 @@ def any_or_hardtanh_min_zero(n: Node): shared_qspec = SharedQuantizationSpec(input_node) quant_properties.quant_inputs = [_QuantProperty(0, shared_qspec)] quant_properties.quant_output = _QuantProperty(0, shared_qspec) + elif node.target == torch.ops.aten.to.dtype: + # If we quantize a cast(fp32) with unit scale and same dtype as input, we can handle it as a no-op in the backend. + input_val = node.all_input_nodes[0].meta.get("val", None) + if input_val is None: + return None + + if input_val.dtype not in (torch.int8, torch.int16, torch.int32): + return None + + quant_properties.quant_output = _QuantProperty( + 0, + FixedQParamsQuantizationSpec( + dtype=input_val.dtype, + scale=1.0, + zero_point=0, + quant_max=torch.iinfo(input_val.dtype).max, + quant_min=torch.iinfo(input_val.dtype).min, + qscheme=torch.per_tensor_symmetric, + is_dynamic=False, + ), + ) elif node.target in ( torch.ops.higher_order.cond, torch.ops.higher_order.while_loop, diff --git a/backends/arm/test/ops/test_div_tensor_mode.py b/backends/arm/test/ops/test_div_tensor_mode.py index d9d058fccc6..9b9b95650b0 100644 --- a/backends/arm/test/ops/test_div_tensor_mode.py +++ b/backends/arm/test/ops/test_div_tensor_mode.py @@ -35,6 +35,18 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return torch.div(x, y, rounding_mode=self.mode) +def _is_integer_rounded_div(mode, inputs) -> bool: + if mode is None: + return False + for input in inputs: + if isinstance(input, torch.Tensor): + if input.dtype.is_floating_point: + return False + if not isinstance(input, int): + return False + return True + + test_data = { "mode_none": lambda: (None, (torch.randn(4, 8), torch.randn(4, 8).abs() + 1e-3)), "mode_floor": lambda: ( @@ -46,6 +58,48 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: (torch.randn(4, 8), torch.randn(4, 8).abs() + 1e-3), ), "int_denominator": lambda: (None, (torch.randn(4, 8), 2)), + "int8_floor": lambda: ( + "floor", + ( + (torch.randn(4, 8) * 100).to(dtype=torch.int8), + (torch.rand(4, 8) * 100 + 10).to(dtype=torch.int8), + ), + ), + "int8_int_scalar": lambda: ( + "floor", + ( + (torch.randn(4, 8) * 100).to(dtype=torch.int8), + 9, + ), + ), + "int8_float_scalar": lambda: ( + "floor", + ( + (torch.randn(4, 8) * 100).to(dtype=torch.int8), + 9.5, + ), + ), + "int16_trunc": lambda: ( + "trunc", + ( + (torch.randn(4, 8) * 100).to(dtype=torch.int8), + (torch.rand(4, 8) * 100 + 10).to(dtype=torch.int16), + ), + ), + "int32_floor": lambda: ( + "floor", + ( + (torch.randn(4, 8) * 100).to(dtype=torch.int32), + (torch.rand(4, 8) * 100 + 10).to(dtype=torch.int32), + ), + ), + "int32_trunc": lambda: ( + "trunc", + ( + (torch.randn(4, 8) * 100).to(dtype=torch.int32), + (torch.rand(4, 8) * 100 + 10).to(dtype=torch.int32), + ), + ), } @@ -61,7 +115,6 @@ def test_div_tensor_mode_tosa_FP(data): exir_op=[], use_to_edge_transform_and_lower=True, ) - pipeline.pop_stage("check_count.exir") pipeline.run() @@ -73,17 +126,22 @@ def test_div_tensor_mode_tosa_INT(data): pipeline = TosaPipelineINT[input_tt]( model, inputs, - aten_op=model.aten_ops_int, + aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True, ) - pipeline.pop_stage("check_count.exir") pipeline.run() @common.XfailIfNoCorstone300 @common.parametrize( - "data", test_data, xfails={"mode_trunc": "CPU op missing in unittests"} + "data", + test_data, + xfails={ + "mode_trunc": "CPU op missing in unittests", + "int16_trunc": "CPU op missing in unittests", + "int32_trunc": "CPU op missing in unittests", + }, ) def test_div_tensor_mode_u55_INT(data): mode, inputs = data() @@ -92,10 +150,12 @@ def test_div_tensor_mode_u55_INT(data): pipeline = EthosU55PipelineINT[input_tt]( model, inputs, - aten_ops=model.aten_ops_int, + aten_ops=[], exir_ops=[], use_to_edge_transform_and_lower=True, ) + pipeline.tester.use_portable_ops = True + pipeline.pop_stage("check_count.exir") pipeline.run() @@ -108,7 +168,7 @@ def test_div_tensor_mode_u85_INT(data): pipeline = EthosU85PipelineINT[input_tt]( model, inputs, - aten_ops=model.aten_ops_int, + aten_ops=[], exir_ops=[], use_to_edge_transform_and_lower=True, ) @@ -124,12 +184,11 @@ def test_div_tensor_mode_vgf_quant(data): pipeline = VgfPipeline[input_tt]( model, inputs, - aten_op=model.aten_ops_int, + aten_op=[], exir_op=[], use_to_edge_transform_and_lower=True, quantize=True, ) - pipeline.pop_stage("check_count.exir") pipeline.run() diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py index 6718fedea04..16f5ff0e36d 100644 --- a/backends/arm/test/ops/test_to_copy.py +++ b/backends/arm/test/ops/test_to_copy.py @@ -20,6 +20,7 @@ ) input_t1 = Tuple[torch.Tensor] # Input x +input_t2 = Tuple[torch.Tensor, torch.Tensor] # Input x, y class Cast(torch.nn.Module): @@ -40,6 +41,40 @@ def forward(self, x: torch.Tensor): return x.to(dtype=self.target_dtype) + x.to(dtype=self.target_dtype) +class CastAddTensor(torch.nn.Module): + def __init__(self, target_dtype): + super().__init__() + self.target_dtype = target_dtype + + def forward(self, x: torch.Tensor, y: torch.Tensor): + return x.to(dtype=self.target_dtype) + y + + +class AddModule(torch.nn.Module): + def forward(self, x: torch.Tensor, y: torch.Tensor): + return x + y + + +class CastToAddModule(torch.nn.Module): + def __init__(self, target_dtype): + super().__init__() + self.target_dtype = target_dtype + self.add = AddModule() + + def forward(self, x: torch.Tensor, y: torch.Tensor): + return self.add(x.to(dtype=self.target_dtype), y) + + +class CastCatTensor(torch.nn.Module): + def __init__(self, target_dtype, dim: int): + super().__init__() + self.target_dtype = target_dtype + self.dim = dim + + def forward(self, x: torch.Tensor, y: torch.Tensor): + return torch.cat((x.to(dtype=self.target_dtype), y), dim=self.dim) + + """ Tests the _to_copy operation. @@ -262,14 +297,6 @@ def test_to_vgf_no_quant(test_data: Tuple): in ToCopySupported::is_node_tosa_supported() before it goes into the delegated graph. """ _TO_COPY_TEST_DATA_INT = { - "rand_int8_fp32": lambda: ( - torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8), - torch.float32, - ), - "rand_int16_fp32": lambda: ( - torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int16), - torch.float32, - ), "rand_int32_fp32": lambda: ( torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32), torch.float32, @@ -300,6 +327,95 @@ def test_to_tosa_INT_not_delegated(test_data: Tuple): pipeline.run() +_TO_COPY_QUANTIZED_IDENTITY_CAST_DATA = { + "int8_cast_add": lambda: ( + (torch.randn(1, 3, 4, 4) * 10).to(dtype=torch.int8), + torch.randn(1, 3, 4, 4), + torch.float32, + ), + "int16_cast_add": lambda: ( + (torch.randn(1, 3, 4, 4) * 10).to(dtype=torch.int16), + torch.randn(1, 3, 4, 4), + torch.float32, + ), + "int32_cast_add": lambda: ( + (torch.randn(1, 3, 4, 4) * 10).to(dtype=torch.int32), + torch.randn(1, 3, 4, 4), + torch.float32, + ), +} + + +_TO_COPY_QUANTIZED_IDENTITY_CAST_CAT_DATA = { + "int8_cast_cat": lambda: ( + (torch.randn(1, 2, 4, 4) * 10).to(dtype=torch.int8), + torch.randn(1, 2, 4, 1), + torch.float32, + 3, + ), + "int16_cast_cat": lambda: ( + (torch.randn(1, 2, 4, 4) * 10).to(dtype=torch.int16), + torch.randn(1, 2, 4, 1), + torch.float32, + 3, + ), +} + + +@common.parametrize("test_data", _TO_COPY_QUANTIZED_IDENTITY_CAST_DATA) +def test_to_tosa_INT_quantized_identity_cast_add(test_data: Tuple): + x, y, new_dtype = test_data() + pipeline = TosaPipelineINT[input_t2]( + CastAddTensor(new_dtype), + (x, y), + aten_op=["torch.ops.aten.add.Tensor"], + exir_op=["executorch_exir_dialects_edge__ops_aten_add_Tensor"], + qtol=1, + ) + pipeline.change_args( + "check_count.exir", + { + "torch.ops.higher_order.executorch_call_delegate": 1, + }, + ) + pipeline.run() + + +@common.parametrize("test_data", _TO_COPY_QUANTIZED_IDENTITY_CAST_CAT_DATA) +def test_to_tosa_INT_quantized_identity_cast_cat(test_data: Tuple): + x, y, new_dtype, dim = test_data() + pipeline = TosaPipelineINT[input_t2]( + CastCatTensor(new_dtype, dim), + (x, y), + aten_op=["torch.ops.aten.cat.default"], + exir_op=["executorch_exir_dialects_edge__ops_aten_cat_default"], + ) + pipeline.run() + + +@common.parametrize("test_data", _TO_COPY_QUANTIZED_IDENTITY_CAST_DATA) +def test_to_tosa_INT_quantized_identity_cast_to_unquantized_add_delegated( + test_data: Tuple, +): + x, y, new_dtype = test_data() + pipeline = TosaPipelineINT[input_t2]( + CastToAddModule(new_dtype), + (x, y), + aten_op=["torch.ops.aten.add.Tensor"], + exir_op=["executorch_exir_dialects_edge__ops_aten_add_Tensor"], + ) + pipeline.quantizer.set_module_name("add", None) + pipeline.pop_stage("check_not.exir") + pipeline.change_args( + "check_count.exir", + { + "torch.ops.higher_order.executorch_call_delegate": 1, + "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 0, + }, + ) + pipeline.run() + + @common.parametrize("test_data", _TO_COPY_TEST_DATA_INT) @common.SkipIfNoModelConverter def test_to_vgf_quant(test_data: Tuple): diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh index 39d8335a26e..fcbdec25043 100755 --- a/backends/arm/test/setup_testing.sh +++ b/backends/arm/test/setup_testing.sh @@ -26,7 +26,7 @@ ${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_confi # test setup to make sure models that are not fully delegated can still be tested and run OK # To use this you can set use_portable_ops=True when creating ArmTester() -portable_ops_list_u55="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::squeeze_copy.dims,dim_order_ops::_clone_dim_order.out" +portable_ops_list_u55="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::squeeze_copy.dims,dim_order_ops::_clone_dim_order.out,dim_order_ops::_to_dim_order_copy.out" portable_ops_list_u65="${portable_ops_list_u55}" portable_ops_list_u85="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::full_like.out,aten::bmm.out,aten::scalar_tensor.out,aten::index.Tensor_out,aten::where.self_out,dim_order_ops::_to_dim_order_copy.out"