feat: support pull and load vision model (#2061)

vansangpfiev · sangjanai · web-flow · commit db3a05a9e427 · 2025-03-07T10:13:34.000+07:00
* feat: support pull and load vision model

* fix: discard metadata for vision models

---------

Co-authored-by: sangjanai &lt;sang@jan.ai&gt;
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
@@ -135,6 +135,7 @@ struct ModelConfig {
   bool text_model = std::numeric_limits<bool>::quiet_NaN();
   std::string id;
   std::vector<std::string> files;
+  std::string mmproj;
   std::size_t created;
   std::string object;
   std::string owned_by = "";
@@ -338,6 +339,9 @@ struct ModelConfig {
       files_array.append(file);
     }
     obj["files"] = files_array;
+    if (!mmproj.empty()) {
+      obj["mmproj"] = mmproj;
+    }
 
     obj["created"] = static_cast<Json::UInt64>(created);
     obj["object"] = object;
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
@@ -21,11 +21,13 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
 
   try {
     yaml_node_ = YAML::LoadFile(file_path);
+    auto nomalize_path = [](std::string p) {
+      std::replace(p.begin(), p.end(), '\\', '/');
+      return p;
+    };
     // incase of model.yml file, we don't have files yet, create them
     if (!yaml_node_["files"]) {
-      auto s = file_path;
-      // normalize path
-      std::replace(s.begin(), s.end(), '\\', '/');
+      auto s = nomalize_path(file_path);
       std::vector<std::string> v;
       if (yaml_node_["engine"] &&
           (yaml_node_["engine"].as<std::string>() == kLlamaRepo ||
@@ -41,6 +43,18 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
       // TODO(any) need to support mutiple gguf files
       yaml_node_["files"] = v;
     }
+
+    // add mmproj file to yml if exists
+    if (!yaml_node_["mmproj"]) {
+      auto s = nomalize_path(file_path);
+      auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
+      CTL_DBG("mmproj: " << abs_path);
+      auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
+      if (std::filesystem::exists(abs_path)) {
+        yaml_node_["mmproj"] = rel_path.string();
+      }
+    }
+
   } catch (const YAML::BadFile& e) {
     throw;
   }
@@ -131,6 +145,8 @@ void YamlHandler::ModelConfigFromYaml() {
       tmp.stop = yaml_node_["stop"].as<std::vector<std::string>>();
     if (yaml_node_["files"])
       tmp.files = yaml_node_["files"].as<std::vector<std::string>>();
+    if (yaml_node_["mmproj"])
+      tmp.mmproj = yaml_node_["mmproj"].as<std::string>();
     if (yaml_node_["created"])
       tmp.created = yaml_node_["created"].as<std::size_t>();
 
@@ -239,6 +255,9 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
     if (model_config_.files.size() > 0)
       yaml_node_["files"] = model_config_.files;
 
+    if (!model_config_.mmproj.empty())
+      yaml_node_["mmproj"] = model_config_.mmproj;
+
     if (!std::isnan(static_cast<double>(model_config_.seed)))
       yaml_node_["seed"] = model_config_.seed;
     if (!std::isnan(model_config_.dynatemp_range))
@@ -301,17 +320,21 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
         "Model ID which is used for request construct - should be "
         "unique between models (author / quantization)");
     out_file << format_utils::WriteKeyValue("name", yaml_node_["name"],
-                                           "metadata.general.name");
+                                            "metadata.general.name");
     if (yaml_node_["version"]) {
-      out_file << "version: " << yaml_node_["version"].as<std::string>() << "\n";
+      out_file << "version: " << yaml_node_["version"].as<std::string>()
+               << "\n";
     }
     if (yaml_node_["files"] && yaml_node_["files"].size()) {
       out_file << "files:             # Can be relative OR absolute local file "
-                 "path\n";
+                  "path\n";
       for (const auto& source : yaml_node_["files"]) {
         out_file << "  - " << source << "\n";
       }
     }
+    if (yaml_node_["mmproj"]) {
+      out_file << "mmproj: " << yaml_node_["mmproj"].as<std::string>() << "\n";
+    }
 
     out_file << "# END GENERAL GGUF METADATA\n";
     out_file << "\n";
@@ -330,9 +353,9 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
     out_file << "# BEGIN OPTIONAL\n";
     out_file << format_utils::WriteKeyValue("size", yaml_node_["size"]);
     out_file << format_utils::WriteKeyValue("stream", yaml_node_["stream"],
-                                           "Default true?");
+                                            "Default true?");
     out_file << format_utils::WriteKeyValue("top_p", yaml_node_["top_p"],
-                                           "Ranges: 0 to 1");
+                                            "Ranges: 0 to 1");
     out_file << format_utils::WriteKeyValue(
         "temperature", yaml_node_["temperature"], "Ranges: 0 to 1");
     out_file << format_utils::WriteKeyValue(
@@ -344,26 +367,26 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
         "Should be default to context length");
     out_file << format_utils::WriteKeyValue("seed", yaml_node_["seed"]);
     out_file << format_utils::WriteKeyValue("dynatemp_range",
-                                           yaml_node_["dynatemp_range"]);
+                                            yaml_node_["dynatemp_range"]);
     out_file << format_utils::WriteKeyValue("dynatemp_exponent",
-                                           yaml_node_["dynatemp_exponent"]);
+                                            yaml_node_["dynatemp_exponent"]);
     out_file << format_utils::WriteKeyValue("top_k", yaml_node_["top_k"]);
     out_file << format_utils::WriteKeyValue("min_p", yaml_node_["min_p"]);
     out_file << format_utils::WriteKeyValue("tfs_z", yaml_node_["tfs_z"]);
     out_file << format_utils::WriteKeyValue("typ_p", yaml_node_["typ_p"]);
     out_file << format_utils::WriteKeyValue("repeat_last_n",
-                                           yaml_node_["repeat_last_n"]);
+                                            yaml_node_["repeat_last_n"]);
     out_file << format_utils::WriteKeyValue("repeat_penalty",
-                                           yaml_node_["repeat_penalty"]);
+                                            yaml_node_["repeat_penalty"]);
     out_file << format_utils::WriteKeyValue("mirostat", yaml_node_["mirostat"]);
     out_file << format_utils::WriteKeyValue("mirostat_tau",
-                                           yaml_node_["mirostat_tau"]);
+                                            yaml_node_["mirostat_tau"]);
     out_file << format_utils::WriteKeyValue("mirostat_eta",
-                                           yaml_node_["mirostat_eta"]);
+                                            yaml_node_["mirostat_eta"]);
     out_file << format_utils::WriteKeyValue("penalize_nl",
-                                           yaml_node_["penalize_nl"]);
+                                            yaml_node_["penalize_nl"]);
     out_file << format_utils::WriteKeyValue("ignore_eos",
-                                           yaml_node_["ignore_eos"]);
+                                            yaml_node_["ignore_eos"]);
     out_file << format_utils::WriteKeyValue("n_probs", yaml_node_["n_probs"]);
     out_file << format_utils::WriteKeyValue("min_keep", yaml_node_["min_keep"]);
     out_file << format_utils::WriteKeyValue("grammar", yaml_node_["grammar"]);
@@ -374,7 +397,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
     out_file << "# BEGIN MODEL LOAD PARAMETERS\n";
     out_file << "# BEGIN REQUIRED\n";
     out_file << format_utils::WriteKeyValue("engine", yaml_node_["engine"],
-                                           "engine to run model");
+                                            "engine to run model");
     out_file << "prompt_template:";
     out_file << " " << yaml_node_["prompt_template"] << "\n";
     out_file << "# END REQUIRED\n";
@@ -384,11 +407,11 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
         "ctx_len", yaml_node_["ctx_len"],
         "llama.context_length | 0 or undefined = loaded from model");
     out_file << format_utils::WriteKeyValue("n_parallel",
-                                           yaml_node_["n_parallel"]);
+                                            yaml_node_["n_parallel"]);
     out_file << format_utils::WriteKeyValue("cpu_threads",
-                                           yaml_node_["cpu_threads"]);
+                                            yaml_node_["cpu_threads"]);
     out_file << format_utils::WriteKeyValue("ngl", yaml_node_["ngl"],
-                                           "Undefined = loaded from model");
+                                            "Undefined = loaded from model");
     out_file << "# END OPTIONAL\n";
     out_file << "# END MODEL LOAD PARAMETERS\n";
 
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
@@ -533,8 +533,8 @@ void Models::StartModel(
   auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
 
   std::optional<std::string> mmproj;
-  if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) {
-    mmproj = o.asString();
+  if (auto& o = (*(req->getJsonObject())); o.isMember("mmproj")) {
+    mmproj = o["mmproj"].asString();
   }
 
   auto bypass_llama_model_path = false;
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
@@ -304,7 +304,7 @@ void HardwareService::UpdateHardwareInfos() {
   };
   for (auto const& he : b.value()) {
     if (!exists(he.uuid)) {
-      db_service_->DeleteHardwareEntry(he.uuid);
+      (void)db_service_->DeleteHardwareEntry(he.uuid);
     }
   }
 
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -155,8 +155,8 @@ ModelService::ModelService(std::shared_ptr<DatabaseService> db_service,
       inference_svc_(inference_service),
       engine_svc_(engine_svc),
       task_queue_(task_queue) {
-  // ProcessBgrTasks();
-};
+        // ProcessBgrTasks();
+      };
 
 void ModelService::ForceIndexingModelList() {
   CTL_INF("Force indexing model list");
@@ -947,6 +947,15 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
         LOG_WARN << "model_path is empty";
         return StartModelResult{.success = false};
       }
+      if (!mc.mmproj.empty()) {
+#if defined(_WIN32)
+        json_data["mmproj"] = cortex::wc::WstringToUtf8(
+            fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).wstring());
+#else
+        json_data["mmproj"] =
+            fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
+#endif
+      }
       json_data["system_prompt"] = mc.system_template;
       json_data["user_prompt"] = mc.user_template;
       json_data["ai_prompt"] = mc.ai_template;
@@ -996,16 +1005,18 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
     auto data = std::get<1>(ir);
 
     if (status == drogon::k200OK) {
-      // start model successfully, we store the metadata so we can use
+      // start model successfully, in case not vision model, we store the metadata so we can use
       // for each inference
-      auto metadata_res = GetModelMetadata(model_handle);
-      if (metadata_res.has_value()) {
-        loaded_model_metadata_map_.emplace(model_handle,
-                                           std::move(metadata_res.value()));
-        CTL_INF("Successfully stored metadata for model " << model_handle);
-      } else {
-        CTL_WRN("Failed to get metadata for model " << model_handle << ": "
-                                                    << metadata_res.error());
+      if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) {
+        auto metadata_res = GetModelMetadata(model_handle);
+        if (metadata_res.has_value()) {
+          loaded_model_metadata_map_.emplace(model_handle,
+                                             std::move(metadata_res.value()));
+          CTL_INF("Successfully stored metadata for model " << model_handle);
+        } else {
+          CTL_WRN("Failed to get metadata for model " << model_handle << ": "
+                                                      << metadata_res.error());
+        }
       }
 
       return StartModelResult{.success = true,

Original file line number	Diff line number	Diff line change
`@@ -533,8 +533,8 @@ void Models::StartModel(`
`533`	`533`	`auto model_handle = (*(req->getJsonObject())).get("model", "").asString();`
`534`	`534`
`535`	`535`	`std::optional<std::string> mmproj;`
`536`		`- if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) {`
`537`		`- mmproj = o.asString();`
	`536`	`+ if (auto& o = (*(req->getJsonObject())); o.isMember("mmproj")) {`
	`537`	`+ mmproj = o["mmproj"].asString();`
`538`	`538`	`}`
`539`	`539`
`540`	`540`	`auto bypass_llama_model_path = false;`
Original file line number	Diff line number	Diff line change
`@@ -304,7 +304,7 @@ void HardwareService::UpdateHardwareInfos() {`
`304`	`304`	`};`
`305`	`305`	`for (auto const& he : b.value()) {`
`306`	`306`	`if (!exists(he.uuid)) {`
`307`		`- db_service_->DeleteHardwareEntry(he.uuid);`
	`307`	`+ (void)db_service_->DeleteHardwareEntry(he.uuid);`
`308`	`308`	`}`
`309`	`309`	`}`
`310`	`310`