diff --git a/runtime/onert/api/nnfw/include/nnfw_experimental.h b/runtime/onert/api/nnfw/include/nnfw_experimental.h index 27dc0c6f7dd..7ac10f95958 100644 --- a/runtime/onert/api/nnfw/include/nnfw_experimental.h +++ b/runtime/onert/api/nnfw/include/nnfw_experimental.h @@ -558,6 +558,58 @@ NNFW_STATUS nnfw_set_codegen_model_path(nnfw_session *session, const char *path) */ NNFW_STATUS nnfw_codegen(nnfw_session *session, const char *target, NNFW_CODEGEN_PREF pref); +/** + * @brief Set MinMax records count in auto compilation mode with on-device compiler + * + * This function set MinMax records count for quantization in auto compilation mode. + * To enable automatic compilation mode, use {@link nnfw_run_with_auto_compilation} + * + * @param[in] session nnfw_session + * @param[in] minmax_records_count minmax records count + * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR + */ +NNFW_STATUS nnfw_set_odc_param_minmax_records_count(nnfw_session *session, + int minmax_records_count); + +/** + * @brief Delete MinMax file for on-device compiler + * + * @param[in] session nnfw_session + * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR + */ +NNFW_STATUS nnfw_odc_delete_minmax_file(nnfw_session *session); + +/** + * @brief Run inference with auto compilation + * + *
This function runs inference with automatic compilation and replaces + * the original model with a quantized or compiled model inside. + * During the inference the minmax statistics is collected and after that quantization is performed. + * If quantization was successful, try to code generating for target backend, otherwise run original + float model. + * If compilation was successful, run compiled model, otherwise run quantized model. + * On-device compiler (ODC) provides quantization and compilation functionality. + * Function should be called after model is loaded by {@link nnfw_load_model_from_file}, + * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers + * by {@link nnfw_set_input} and {@link nnfw_set_output}. + * + * Additionally the following parameters should be set up : + * 1. Quantization type {@link nnfw_set_quantization_type } + * 2. Quantizated model path {@link nnfw_set_quantized_model_path } + * 3. Minmax records threshold for quantization {@link nnfw_set_odc_param_minmax_records_count } + * 3. File with minMax statistics can be removed by {@link nnfw_odc_delete_minmax_file} + * 4. Compiled model path {@link nnfw_set_codegen_model_path} + *
+ * + * @param[in] session nnfw_session + * @param[in] target Target backend to generate code as in {@link nnfw_codegen} + * @param[in] pref @c NNFW_CODEGEN_PREF + + * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR + */ +NNFW_STATUS nnfw_run_with_auto_compilation(nnfw_session *session, const char *target, + NNFW_CODEGEN_PREF pref); + ////////////////////////////////////////////// // APIs for configuration ////////////////////////////////////////////// diff --git a/runtime/onert/api/nnfw/src/nnfw_api.cc b/runtime/onert/api/nnfw/src/nnfw_api.cc index 7720ed27584..ea217c50e9f 100644 --- a/runtime/onert/api/nnfw/src/nnfw_api.cc +++ b/runtime/onert/api/nnfw/src/nnfw_api.cc @@ -508,6 +508,25 @@ NNFW_STATUS nnfw_codegen(nnfw_session *session, const char *target, NNFW_CODEGEN return session->codegen(target, pref); } +NNFW_STATUS nnfw_set_odc_param_minmax_records_count(nnfw_session *session, int minmax_records_count) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->set_odc_param_minmax_records_count(minmax_records_count); +} + +NNFW_STATUS nnfw_odc_delete_minmax_file(nnfw_session *session) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->delete_odc_minmax_file(); +} + +NNFW_STATUS nnfw_run_with_auto_compilation(nnfw_session *session, const char *target, + NNFW_CODEGEN_PREF pref) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->run_with_auto_compilation(target, pref); +} + // Configuration NNFW_STATUS nnfw_set_prepare_config(nnfw_session *session, const NNFW_PREPARE_CONFIG key, diff --git a/runtime/onert/api/nnfw/src/nnfw_api_internal.cc b/runtime/onert/api/nnfw/src/nnfw_api_internal.cc index 084579490fb..3a579c2ab59 100644 --- a/runtime/onert/api/nnfw/src/nnfw_api_internal.cc +++ b/runtime/onert/api/nnfw/src/nnfw_api_internal.cc @@ -2028,3 +2028,300 @@ NNFW_STATUS nnfw_session::reset_execute_config() return NNFW_STATUS_NO_ERROR; } + +NNFW_STATUS nnfw_session::set_odc_param_minmax_records_count(int minmax_records_count) +{ + if (isStateInitialized() || isStateRunning()) + { + std::cerr << "invalid state" << std::endl; + return NNFW_STATUS_INVALID_STATE; + } + + if (_quant_manager->setMinMaxRecordsThreshold(minmax_records_count)) + return NNFW_STATUS_NO_ERROR; + else + return NNFW_STATUS_ERROR; +} + +NNFW_STATUS nnfw_session::delete_odc_minmax_file() +{ + if (isStateRunning()) + { + std::cerr << "invalid state" << std::endl; + return NNFW_STATUS_INVALID_STATE; + } + + if (_quant_manager->deleteMinMaxFile()) + return NNFW_STATUS_NO_ERROR; + else + return NNFW_STATUS_ERROR; +} + +// run with auto compilation +NNFW_STATUS nnfw_session::run_with_auto_compilation(const char *target, NNFW_CODEGEN_PREF pref) +{ + + if (!isStatePreparedOrFinishedRun()) + { + std::cerr << "Error during nnfw_session::run_with_auto_compilation : " + << "run should be after preparation" << std::endl; + return NNFW_STATUS_INVALID_STATE; + } + + // Check quantization and code-generation parameters + std::string target_str{target}; + if (_quant_manager->exportModelPath().empty() || _codegen_manager->exportModelPath().empty() || + target_str.empty() || target_str.substr(target_str.size() - 4) != "-gen") + { + std::cerr << "Error during nnfw_session::run_with_auto_compilation : " + << "quantization and code generation parameters should be set" << std::endl; + return NNFW_STATUS_INVALID_STATE; + } + + // Odc: auto compilation with hidden switching mechanizm + // Check is model already quantized or compiled + std::ifstream file_quantized_model(_quant_manager->exportModelPath()); + std::ifstream file_compiled_model(_codegen_manager->exportModelPath()); + + if (!file_quantized_model.good() && !file_compiled_model.good()) + { + // Run float model and try to quantize it + { + // Save execution options + auto saved_options = _execution->executionOptions(); + // turn on minmax recording + _execution->executionOptions().dump_minmax = true; + + try + { + _execution->execute(); + } + catch (const onert::InsufficientBufferSizeException &e) + { + // Currently insufficient buffer always means output buffer. + std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what() + << std::endl; + return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE; + } + catch (const std::exception &e) + { + std::cerr << "Error during nnfw_session::run_with_auto_compilation : " << e.what() + << std::endl; + return NNFW_STATUS_ERROR; + } + + _state = State::FINISHED_RUN; + + // restore min_max option to user defined state + _execution->executionOptions().dump_minmax = saved_options.dump_minmax; + + // if enough statistics are collected, then run the quantization + if (_quant_manager->readyForQuantize()) + { + try + { + if (isStateInitialized() || isStateRunning()) + { + std::cerr << "invalid state" << std::endl; + return NNFW_STATUS_INVALID_STATE; + } + + auto result = _quant_manager->quantize(_model_path); + if (!result) + return NNFW_STATUS_INVALID_STATE; + + // remove minmax file + result = _quant_manager->deleteMinMaxFile(); + if (!result) + return NNFW_STATUS_INVALID_STATE; + } + catch (const std::exception &e) + { + std::cerr + << "Error during nnfw_session::run_with_auto_compilation in quantize operation: " + << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + } + } + } + else + { + // run compiled or quantized model + NNFW_STATUS status; + + // turn off minmax recording + _execution->executionOptions().dump_minmax = false; + + // save initial buffers if quantized model or compiled model is not loaded + if (_autoCompilationState == nnfw_session::AutoCompilationState::INITIAL_STATE) + { + auto dotidx = _codegen_manager->exportModelPath().rfind('.'); + if (dotidx == std::string::npos) + { + std::cerr << "Error during nnfw_session::run_with_auto_compilation : Invalid compiled " + "model path. Please use a " + "path that includes the extension." + << std::endl; + return NNFW_STATUS_ERROR; + } + + std::string compiled_model_type = + _codegen_manager->exportModelPath().substr(dotidx + 1); // + 1 to exclude dot + + dotidx = _quant_manager->exportModelPath().rfind('.'); + if (dotidx == std::string::npos) + { + std::cerr << "Error during nnfw_session::run_with_auto_compilation : Invalid quantized " + "model path. Please use a " + "path that includes the extension." + << std::endl; + return NNFW_STATUS_ERROR; + } + std::string quantized_model_type = + _quant_manager->exportModelPath().substr(dotidx + 1); // + 1 to exclude dot + + // Save initial (float) input and output buffers + auto input_size = _compiler_artifact->_executors->inputSize(); + auto output_size = _compiler_artifact->_executors->outputSize(); + + std::vector