[QNN EP] Revert workarounds for problems in old QNN versions (#25171)

qti-jkilpatrick · jeffkilpatrick · web-flow · commit 18282b1e0372 · 2025-06-25T20:41:00.000-07:00
### Description

* Re-enable tests and remove workarounds that were introduced as part of a QNN &lt;= 2.31 upgrade but are no longer necessary.


### Motivation and Context

QNN/QAIRT releases about once a month. As ONNX Runtime adopts these new versions, some number of tests are often found to be impacted.
Consequently, tests are skipped and tolerances are loosened. This change reverts as many of those workarounds as possible that were made for QNN upgrades between 2.17 and 2.31, inclusive. The most recent few releases were intentionally not examined to minimize impact on users on old versions and to avoid lock-in to the bleeding edge.

---------

Co-authored-by: Jeff Kilpatrick &lt;jkilpat@qti.qualcomm.com&gt;
diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc
@@ -1393,59 +1393,7 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
   }
 
   if (provider_name == "qnn") {
-    broken_tests->insert({"gemm_default_no_bias", "result differs"});
     broken_tests->insert({"resize_downsample_scales_linear", "result differs"});
-    broken_tests->insert({"resize_downsample_scales_linear_antialias", "result differs"});
-    broken_tests->insert({"resize_downsample_sizes_linear_antialias", "result differs"});
-    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii", "result differs"});
-    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_expanded", "result differs"});
-    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_log_prob", "result differs"});
-    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean", "result differs"});
-    broken_tests->insert({"sce_mean_3d", "result differs"});
-    broken_tests->insert({"sce_mean_3d_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_3d_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_3d_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_3d", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_3d_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_3d_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_4d", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_4d_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_4d_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_no_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight", "result differs"});
-    broken_tests->insert({"sce_mean_weight_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_3d", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_3d_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_3d_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_4d", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_4d_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_4d_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_mean_weight_log_prob", "result differs"});
-    broken_tests->insert({"sce_mean_weight_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_none", "result differs"});
-    broken_tests->insert({"sce_none_expanded", "result differs"});
-    broken_tests->insert({"sce_none_log_prob", "result differs"});
-    broken_tests->insert({"sce_none_log_prob_expanded", "result differs"});
-    broken_tests->insert({"sce_sum", "result differs"});
-    broken_tests->insert({"sce_sum_expanded", "result differs"});
-    broken_tests->insert({"sce_sum_log_prob", "result differs"});
-    broken_tests->insert({"sce_sum_log_prob_expanded", "result differs"});
-    broken_tests->insert({"gridsample_reflection_padding", "result differs"});
     broken_tests->insert({"gridsample_volumetric_nearest_align_corners_0", "unknown version"});
     broken_tests->insert({"gridsample_volumetric_nearest_align_corners_1", "unknown version"});
     broken_tests->insert({"rotary_embedding", "unknown version"});
@@ -1454,9 +1402,7 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
     broken_tests->insert({"rotary_embedding_no_position_ids_expanded", "unknown version"});
     broken_tests->insert({"rotary_embedding_no_position_ids_interleaved", "unknown version"});
     broken_tests->insert({"rotary_embedding_no_position_ids_interleaved_expanded", "unknown version"});
-    broken_tests->insert({"spacetodepth", "result differs"});
-    broken_tests->insert({"reduce_sum_square_empty_set_expanded", "unknown version"});
-    // Fails with QNN SDK 2.17.0:
+    // Fails since QNN SDK 2.17.0:
     // expected 7.70947 (40f6b3f3), got 7.84096 (40fae920), diff: 0.131491, tol=0.00870947 idx=419. 100 of 1715 differ
     broken_tests->insert({"facedetection_op8_qdq", "result differs"});
     // Fails with QNN SDK 2.34.0:
@@ -1466,11 +1412,6 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
     broken_tests->insert({"mobilenetv2-1.0", "result differs with 2.34"});
     broken_tests->insert({"facedetection_op8", "segfault with CPU backend, will be fixed by QNN 2.36"});
 
-#if defined(_WIN32) && defined(_M_AMD64)
-    // Fails with QNN SDK 2.17.0 on Windows x64:
-    // expected 13.5 (41580000), got 0 (0), diff: 13.5, tol=0.0145 idx=3. 3 of 4 differ
-    broken_tests->insert({"averagepool_2d_ceil", "result differs"});
-#endif
     // These next 3 Resize tests fail on CPU backend with QNN SDK 2.22.0 due to inaccuracy.
     // output=Y:expected 1 (3f800000), got 3 (40400000), diff: 2, tol=0.002 idx=24. 8 of 56 differ
     broken_tests->insert({"resize_upsample_sizes_nearest", "result differs"});
@@ -1482,12 +1423,6 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
     broken_tests->insert({"convtranspose_group_2_image_3", "Segmentation fault (core dumped). CPU test passed."});
     // Fails with QNN 2.31 on Windows x64 for CPU
     broken_tests->insert({"gelu_tanh_2", "y:expected -0.0131778 (bc57e7d5), got -0.0136333 (bc5f5e38), diff: 0.000455472, tol=2.31778e-05."});
-    broken_tests->insert({"convtranspose_pad", "Access violation 0xc000005 from call graphAddNode."});
-    broken_tests->insert({"convtranspose_pads", "Access violation 0xc000005 from call graphAddNode."});
-    broken_tests->insert({"convtranspose_output_shape", "Access violation 0xc000005 from call graphAddNode."});
-    broken_tests->insert({"convtranspose_kernel_shape", "Access violation 0xc000005 from call graphAddNode."});
-    broken_tests->insert({"convtranspose_1d", "Access violation 0xc000005 from call graphAddNode."});
-    broken_tests->insert({"convtranspose", "Access violation 0xc000005 from call graphAddNode."});
     broken_tests->insert({"averagepool_2d_ceil", "result differs. expected 13.5 (41580000), got 0 (0)"});
     // Fails with QNN 2.32
     broken_tests->insert({"resize_upsample_scales_linear", "expected 1 (3f800000), got 0.25 (3e800000)"});
diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc
@@ -142,9 +142,7 @@ TEST_F(QnnHTPBackendTests, AveragePool_CountIncludePad_HTP_u8) {
                                    {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
                                     utils::MakeAttribute("count_include_pad", static_cast<int64_t>(1))},
                                    ExpectedEPNodeAssignment::All,
-                                   18,
-                                   // Need tolerance of 0.414% of output range after QNN SDK 2.17
-                                   QDQTolerance(0.00414f));
+                                   18);
 }
 
 // QDQ AveragePool that use auto_pad 'SAME_UPPER'.
@@ -157,9 +155,7 @@ TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameUpper_HTP_u8) {
                                    {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
                                     utils::MakeAttribute("auto_pad", "SAME_UPPER")},
                                    ExpectedEPNodeAssignment::All,
-                                   18,
-                                   // Need to use tolerance of 0.414% of output range after QNN SDK 2.17
-                                   QDQTolerance(0.00414f));
+                                   18);
 }
 
 // QDQ AveragePool that use auto_pad 'SAME_LOWER'.
@@ -172,9 +168,7 @@ TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameLower_HTP_u8) {
                                    {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
                                     utils::MakeAttribute("auto_pad", "SAME_LOWER")},
                                    ExpectedEPNodeAssignment::All,
-                                   18,
-                                   // Need to use tolerance of 0.414% of output range after QNN SDK 2.17
-                                   QDQTolerance(0.00414f));
+                                   18);
 }
 
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc
@@ -708,9 +708,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_Test_QDQConvWithDynamicWeightsFromMul) {
   RunQnnModelTest(BuildConvMulGraph,
                   provider_options,
                   13,
-                  ExpectedEPNodeAssignment::All,
-                  4e-4f);  // Accuracy decreased slightly in QNN SDK 2.17.
-                           // Expected: 9.94500065, Actual: 9.94537735
+                  ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Conv -> Q as a single unit.
@@ -727,9 +725,7 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_dynamic_input) {
                                      "NOTSET",
                                      ExpectedEPNodeAssignment::All,
                                      false,  // use_qdq_contrib_ops
-                                     13,     // opset
-                                     // Need tolerance of 0.413% of output range after QNN SDK 2.17
-                                     QDQTolerance(0.00413f));
+                                     13);    // opset
 
   RunHTPConvOpTest<uint8_t, uint8_t>("Conv",
                                      TestInputDef<float>({1, 1, 5, 5, 5}, false, 0.0f, 10.0f),   // Random dynamic input
@@ -742,9 +738,7 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_dynamic_input) {
                                      "NOTSET",
                                      ExpectedEPNodeAssignment::All,
                                      false,  // use_qdq_contrib_ops
-                                     13,     // opset
-                                     // Need tolerance of 0.413% of output range after QNN SDK 2.17
-                                     QDQTolerance(0.00413f));
+                                     13);    // opset
 }
 
 // Test per-channel QDQ Conv. in0: u8, in1 (weight): s8, in2 (bias): s32, out: u8
@@ -1911,9 +1905,7 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_initializer) {
                                      "NOTSET",
                                      ExpectedEPNodeAssignment::All,
                                      false,  // use_qdq_contrib_ops
-                                     13,     // opset
-                                     // Need tolerance of 0.413% of output range after QNN SDK 2.17
-                                     QDQTolerance(0.00413f));
+                                     13);    // opset
 
   RunHTPConvOpTest<uint8_t, uint8_t>("Conv",
                                      TestInputDef<float>({1, 1, 5, 5, 5}, false, 0.0f, 10.0f),   // Random dynamic input
@@ -1926,9 +1918,7 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_initializer) {
                                      "NOTSET",
                                      ExpectedEPNodeAssignment::All,
                                      false,  // use_qdq_contrib_ops
-                                     13,     // opset
-                                     // Need tolerance of 0.413% of output range after QNN SDK 2.17
-                                     QDQTolerance(0.00413f));
+                                     13);    // opset
 }
 
 // Tests 1D Conv with bias as an initializer.
@@ -2136,12 +2126,6 @@ TEST_F(QnnHTPBackendTests, DISABLED_ConvU8U8S32_large_input1_padding_bias_initia
 }
 
 TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input2_bias_initializer) {
-#ifdef __linux__
-  // On Linux QNN SDK 2.17: Need a tolerance of 0.785% of output range to pass.
-  QDQTolerance tolerance = QDQTolerance(0.00785f);
-#else
-  QDQTolerance tolerance = QDQTolerance();
-#endif
   RunHTPConvOpTest<uint8_t, uint8_t>("Conv",
                                      TestInputDef<float>({1, 128, 8, 56}, false, 0.f, 10.f),  // Dynamic input
                                      TestInputDef<float>({32, 128, 1, 1}, true, -1.f, 1.f),   // Random static weights
@@ -2153,8 +2137,7 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input2_bias_initializer) {
                                      "NOTSET",
                                      ExpectedEPNodeAssignment::All,
                                      false,
-                                     13,
-                                     tolerance);
+                                     13);
 }
 
 TEST_F(QnnHTPBackendTests, ConvU8U8S32_LargeInput_Dilations_Pads) {
diff --git a/onnxruntime/test/providers/qnn/gemm_op_test.cc b/onnxruntime/test/providers/qnn/gemm_op_test.cc
@@ -336,8 +336,7 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicInputs) {
                                         ExpectedEPNodeAssignment::All,
                                         13,
                                         false,
-                                        // Require tolerance of 0.74% on Windows ARM64.
-                                        QDQTolerance(0.0074f));
+                                        QDQTolerance(0.00410f));
 }
 
 TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
@@ -356,8 +355,7 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
                                         ExpectedEPNodeAssignment::All,
                                         13,
                                         false,
-                                        // Require tolerance of 0.74% on Windows ARM64.
-                                        QDQTolerance(0.0074f));
+                                        QDQTolerance(0.00410f));
 }
 
 TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) {
@@ -376,8 +374,7 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) {
                                         ExpectedEPNodeAssignment::All,
                                         13,
                                         false,
-                                        // Require tolerance of 0.74% on Windows ARM64.
-                                        QDQTolerance(0.0074f));
+                                        QDQTolerance(0.00410f));
 }
 
 // Test 16-bit QDQ Gemm with dynamic inputs A and Bias. The B input is an initializer.
diff --git a/onnxruntime/test/providers/qnn/layer_norm_test.cc b/onnxruntime/test/providers/qnn/layer_norm_test.cc
@@ -32,13 +32,7 @@ static void RunLayerNormCpuTest(const TestInputDef<float>& input_def,
                   expected_ep_assignment);
 }
 
-#ifdef __linux__
-// This CPU test fails on Linux, QNN SDK 2.17
-// the value pair (-1.75661933, 0) at index #1 don't match, which is 1.75662 from -1.75662
-TEST_F(QnnCPUBackendTests, DISABLED_LayerNorm) {
-#else
 TEST_F(QnnCPUBackendTests, LayerNorm) {
-#endif
   RunLayerNormCpuTest(TestInputDef<float>({2, 3}, false, GetFloatDataInRange(0.0f, 10.0f, 6)),
                       TestInputDef<float>({2, 3}, false, GetFloatDataInRange(0.0f, 10.0f, 6)),
                       {utils::MakeAttribute("axis", static_cast<int64_t>(0))},
@@ -210,7 +204,7 @@ TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_StaticScale_AU16_WU8) {
 
 // Test accuracy of 8-bit QDQ LayerNorm with a dynamic scale input.
 //
-// TODO(adrianlizarraga): Fails to finalize with QNN SDK 2.22. Still fails on QNN SDK 2.28.2.
+// TODO(adrianlizarraga): Fails to finalize with QNN SDK 2.22. Still fails on QNN SDK 2.35.0.
 // Verbose logs:
 // Starting stage: Graph Transformations and Optimizations
 // C:\...\QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:203:ERROR:could not create op: q::flat_to_vtcm
diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc
@@ -149,20 +149,13 @@ TEST_F(QnnHTPBackendTests, LRNSize5) {
 }
 
 TEST_F(QnnHTPBackendTests, LRN_size_larger_than_channel) {
-#ifdef __linux__
-  // On Linux QNN SDK 2.17: Need a tolerance of 0.407% of output range to pass.
-  QDQTolerance tolerance = QDQTolerance(0.00407f);
-#else
-  QDQTolerance tolerance = QDQTolerance();
-#endif
   RunQDQLRNOpTest<uint8_t>(TestInputDef<float>({1, 128, 4, 5}, false, -10.0f, 10.0f),
                            255,  // Size
                            ExpectedEPNodeAssignment::All,
                            0.0001f,  // alpha
                            0.75f,    // beta
                            1.0f,     // bias
-                           13,       // opset
-                           tolerance);
+                           13);
 }
 
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp
@@ -194,13 +194,7 @@ TEST_F(QnnCPUBackendTests, MatMulOp) {
   RunMatMulOpTest(false, {2, 3, 3, 3}, {3, 2}, false, true);
   RunMatMulOpTest(false, {2, 3, 3, 3}, {2, 3, 3, 2}, false, true);
 
-#if defined(__linux__)
-  // TODO: This fails on Linux (HTP emulation). Works on Windows ARM64.
-  // Expected: contains 24 values, where each value and its corresponding value in 16-byte object <18-00 00-00 00-00 00-00 00-29 4E-53 A8-55 00-00> are an almost-equal pair
-  // Actual: 16-byte object <18-00 00-00 00-00 00-00 80-28 3E-53 A8-55 00-00>, where the value pair (0.0285999943, 0) at index #12 don't match, which is -0.0286 from 0.0286
-#else
   RunMatMulOpTest(false, {2, 1, 2, 3}, {3, 3, 2}, false, false);
-#endif
   RunMatMulOpTest(false, {3}, {3}, false, false);
   RunMatMulOpTest(false, {3}, {3}, false, true);
   RunMatMulOpTest(false, {3}, {3}, true, false);
@@ -285,7 +279,7 @@ TEST_F(QnnHTPBackendTests, MatMulOp_QDQ) {
   // UINT16, per-channel INT8 weight
   RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3}, {3, 2}, 1, QDQTolerance(),
                                                            ExpectedEPNodeAssignment::All, 21, false, false);
-  RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3, 3}, {3}, -1, QDQTolerance(0.005f));
+  RunQDQPerChannelMatMulOpTest<uint16_t, int8_t, uint16_t>({2, 3, 3}, {3}, -1, QDQTolerance(0.0041f));
 }
 
 // Tests MatMul with two uint16 (quantized) inputs that are both dynamic.
diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp
@@ -182,10 +182,8 @@ TEST_F(QnnHTPBackendTests, MaxPool_Large_Input_HTP_u8) {
                              utils::MakeAttribute("storage_order", static_cast<int64_t>(0)),
                              utils::MakeAttribute("auto_pad", "NOTSET")},
                             ExpectedEPNodeAssignment::All,
-                            18,     // opset
-                            false,  // use_contrib_qdq_ops
-                            // Need a tolerance of 0.417% of output range after QNN SDK 2.17
-                            QDQTolerance(0.00417f));
+                            18,      // opset
+                            false);  // use_contrib_qdq_ops
 }
 
 TEST_F(QnnHTPBackendTests, MaxPool1D_ReshapeNodesPresent) {
diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc