Add some extra docs & checks after testing QNN Context Cache support with ORT team (#324)

ivberg · HectorSVC · web-flow · commit 07aaa43bfc69 · 2023-10-25T10:43:24.000-07:00
Co-authored-by: Hector Li &lt;hecli@microsoft.com&gt;
diff --git a/c_cxx/QNN_EP/mobilenetv2_classification/README.md b/c_cxx/QNN_EP/mobilenetv2_classification/README.md
@@ -3,8 +3,12 @@
 - The sample uses the QNN EP to:
   - a. run the float32 model on Qnn CPU banckend.
   - b. run the QDQ model on HTP backend with qnn_context_cache_enable=1, and generates the Onnx model which has QNN context binary embedded.
+     a Model inputs & outputs will be float32
   - c. run the QNN context binary model generated from ONNX Runtime (previous step) on HTP backend, to improve the model initialization time and reduce memory overhead.
   - d. run the QNN context binary model generated from QNN tool chain on HTP backend, to support models generated from native QNN tool chain.
+     a Models inputs & outputs will be quantized INT8 and kitten_input_nhwc.raw input per the qnn-onnx-converter.exe options used
+     a E.g. qnn-onnx-converter.exe --input_dtype uint8 --input_layout NHWC
+     a See QNN doc - docs/QNN/general/tools.html#qnn-onnx-converter
 - The sample downloads the mobilenetv2 model from Onnx model zoo, and use mobilenetv2_helper.py to quantize the float32 model to QDQ model which is required for HTP backend
 - The sample is targeted to run on QC ARM64 device.
 - There are 2 ways to improve the session creation time by using of QNN context binary:
@@ -41,19 +45,21 @@
 - Windows 11
 - Visual Studio 2022
 - Python (needed to quantize model)
-- OnnxRuntime ARM Build with initial QNN support such as ONNX Runtime (ORT) Microsoft.ML.OnnxRuntime.QNN 1.15+ 
+- Qualcomm AI Engine Direct SDK (QNN SDK) from https://qpm.qualcomm.com/main/tools/details/qualcomm_ai_engine_direct
+    - Last known working QNN version: 2.14.1.230828
+- OnnxRuntime ARM Build with QNN support such as ONNX Runtime (ORT) Microsoft.ML.OnnxRuntime.QNN 1.17+ 
   - Download from https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.QNN and unzip
   - ORT Drop DOES NOT INCLUDE QNN so QNN binaries must be copied from QC SDK. E.g
-    - robocopy C:\Qualcomm\AIStack\QNN\2.15.1.230926\lib\aarch64-windows-msvc %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.16.0\runtimes\win-arm64\native
-    - copy C:\Qualcomm\AIStack\QNN\2.15.1.230926\lib\hexagon-v68\unsigned\libQnnHtpV68Skel.so %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.16.0\runtimes\win-arm64\native
-    - copy C:\Qualcomm\AIStack\QNN\2.15.1.230926\lib\hexagon-v73\unsigned\libQnnHtpV73Skel.so %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.16.0\runtimes\win-arm64\native
+    - robocopy C:\Qualcomm\AIStack\QNN\2.14.1.230828\lib\aarch64-windows-msvc %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.17.0\runtimes\win-arm64\native
+    - copy C:\Qualcomm\AIStack\QNN\2.14.1.230828\lib\hexagon-v68\unsigned\libQnnHtpV68Skel.so %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.17.0\runtimes\win-arm64\native
+    - copy C:\Qualcomm\AIStack\QNN\2.14.1.230828\lib\hexagon-v73\unsigned\libQnnHtpV73Skel.so %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.17.0\runtimes\win-arm64\native
 - (OR) Compiled from onnxruntime source with QNN support - https://onnxruntime.ai/docs/build/eps.html#qnn
 
 ## How to run the application
 (Windows11) Run ```run_qnn_ep_sample.bat``` with path to onnxruntime root directory (for includes) and path to bin directory
 ```
 run_qnn_ep_sample.bat PATH_TO_ORT_ROOT_WITH_INCLUDE_FOLDER PATH_TO_ORT_BINARIES_WITH_QNN
-Example (Drop): run_qnn_ep_sample.bat %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.16.0\build\native %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.16.0\runtimes\win-arm64\native
+Example (Drop): run_qnn_ep_sample.bat %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.17.0\build\native %USERPROFILE%\Downloads\microsoft.ml.onnxruntime.qnn.1.17.0\runtimes\win-arm64\native
 Example (Src): run_qnn_ep_sample.bat C:\src\onnxruntime C:\src\onnxruntime\build\Windows\RelWithDebInfo\RelWithDebInfo
 ```
 
diff --git a/c_cxx/QNN_EP/mobilenetv2_classification/run_qnn_ep_sample.bat b/c_cxx/QNN_EP/mobilenetv2_classification/run_qnn_ep_sample.bat
@@ -38,18 +38,31 @@ REM Download label file
 IF NOT EXIST %LABEL_FILE% (
     powershell -Command "Invoke-WebRequest %LABEL_FILE_URL% -Outfile %LABEL_FILE%" )
 
-REM Generate QDQ model, fixed shape float32 model, fixed shape QDQ model, kitten_input.raw
+REM Generate QDQ model, fixed shape float32 model, fixed shape QDQ model, kitten_input.raw, and kitten_input_nhwc.raw
 REM If there are issues installing python pkgs due to long paths see https://github.com/onnx/onnx/issues/5256
 IF NOT EXIST mobilenetv2-12_shape.onnx (
-    @ECHO ON
-    pip install opencv-python
-    pip install pillow
-    pip install onnx
-    pip install onnxruntime
-    python mobilenetv2_helper.py
-    @ECHO OFF
+    GOTO INSTALL_PYTHON_DEPS_AND_RUN_HELPER
+) ELSE IF NOT EXIST kitten_input.raw (
+    GOTO INSTALL_PYTHON_DEPS_AND_RUN_HELPER
+) ELSE IF NOT EXIST kitten_input_nhwc.raw (
+    GOTO INSTALL_PYTHON_DEPS_AND_RUN_HELPER
+) ELSE (
+    GOTO END_PYTHON
 )
 
+:INSTALL_PYTHON_DEPS_AND_RUN_HELPER
+@ECHO ON
+pip install opencv-python
+pip install pillow
+pip install onnx
+pip install onnxruntime
+python mobilenetv2_helper.py
+@ECHO OFF
+GOTO END_PYTHON
+
+:END_PYTHON
+
+
 REM Download add_trans_cast.py file
 set QNN_CTX_ONNX_GEN_SCRIPT_URL="https://raw.githubusercontent.com/microsoft/onnxruntime/main/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py"
 set QNN_CTX_ONNX_GEN_SCRIPT="gen_qnn_ctx_onnx_model.py"
@@ -107,8 +120,15 @@ REM load mobilenetv2-12_quant_shape.onnx with QNN HTP backend, generate mobilene
 REM This does not has to be run on real device with HTP, it can be done on x64 platform also, since it supports offline generation
 qnn_ep_sample.exe --htp mobilenetv2-12_quant_shape.onnx kitten_input.raw --gen_ctx
 
-REM run mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx with QNN HTP backend
-qnn_ep_sample.exe --htp mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx kitten_input.raw
+REM TODO Check for mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx
+
+IF EXIST mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx (
+    REM run mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx with QNN HTP backend (generted from previous step)
+    qnn_ep_sample.exe --htp mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx kitten_input.raw
+) ELSE (
+    ECHO mobilenetv2-12_quant_shape.onnx_qnn_ctx.onnx does not exist. It didn't get generated in previous step. Are you using ONNX 1.17+?
+)
+
 
 REM run mobilenetv2-12_net_qnn_ctx.onnx (generated from native QNN) with QNN HTP backend
 qnn_ep_sample.exe --qnn mobilenetv2-12_net_qnn_ctx.onnx kitten_input_nhwc.raw