Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit db3a05a

Browse files
feat: support pull and load vision model (#2061)
* feat: support pull and load vision model * fix: discard metadata for vision models --------- Co-authored-by: sangjanai <[email protected]>
1 parent 5ce2805 commit db3a05a

File tree

5 files changed

+72
-34
lines changed

5 files changed

+72
-34
lines changed

engine/config/model_config.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ struct ModelConfig {
135135
bool text_model = std::numeric_limits<bool>::quiet_NaN();
136136
std::string id;
137137
std::vector<std::string> files;
138+
std::string mmproj;
138139
std::size_t created;
139140
std::string object;
140141
std::string owned_by = "";
@@ -338,6 +339,9 @@ struct ModelConfig {
338339
files_array.append(file);
339340
}
340341
obj["files"] = files_array;
342+
if (!mmproj.empty()) {
343+
obj["mmproj"] = mmproj;
344+
}
341345

342346
obj["created"] = static_cast<Json::UInt64>(created);
343347
obj["object"] = object;

engine/config/yaml_config.cc

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
2121

2222
try {
2323
yaml_node_ = YAML::LoadFile(file_path);
24+
auto nomalize_path = [](std::string p) {
25+
std::replace(p.begin(), p.end(), '\\', '/');
26+
return p;
27+
};
2428
// incase of model.yml file, we don't have files yet, create them
2529
if (!yaml_node_["files"]) {
26-
auto s = file_path;
27-
// normalize path
28-
std::replace(s.begin(), s.end(), '\\', '/');
30+
auto s = nomalize_path(file_path);
2931
std::vector<std::string> v;
3032
if (yaml_node_["engine"] &&
3133
(yaml_node_["engine"].as<std::string>() == kLlamaRepo ||
@@ -41,6 +43,18 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
4143
// TODO(any) need to support mutiple gguf files
4244
yaml_node_["files"] = v;
4345
}
46+
47+
// add mmproj file to yml if exists
48+
if (!yaml_node_["mmproj"]) {
49+
auto s = nomalize_path(file_path);
50+
auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
51+
CTL_DBG("mmproj: " << abs_path);
52+
auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
53+
if (std::filesystem::exists(abs_path)) {
54+
yaml_node_["mmproj"] = rel_path.string();
55+
}
56+
}
57+
4458
} catch (const YAML::BadFile& e) {
4559
throw;
4660
}
@@ -131,6 +145,8 @@ void YamlHandler::ModelConfigFromYaml() {
131145
tmp.stop = yaml_node_["stop"].as<std::vector<std::string>>();
132146
if (yaml_node_["files"])
133147
tmp.files = yaml_node_["files"].as<std::vector<std::string>>();
148+
if (yaml_node_["mmproj"])
149+
tmp.mmproj = yaml_node_["mmproj"].as<std::string>();
134150
if (yaml_node_["created"])
135151
tmp.created = yaml_node_["created"].as<std::size_t>();
136152

@@ -239,6 +255,9 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
239255
if (model_config_.files.size() > 0)
240256
yaml_node_["files"] = model_config_.files;
241257

258+
if (!model_config_.mmproj.empty())
259+
yaml_node_["mmproj"] = model_config_.mmproj;
260+
242261
if (!std::isnan(static_cast<double>(model_config_.seed)))
243262
yaml_node_["seed"] = model_config_.seed;
244263
if (!std::isnan(model_config_.dynatemp_range))
@@ -301,17 +320,21 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
301320
"Model ID which is used for request construct - should be "
302321
"unique between models (author / quantization)");
303322
out_file << format_utils::WriteKeyValue("name", yaml_node_["name"],
304-
"metadata.general.name");
323+
"metadata.general.name");
305324
if (yaml_node_["version"]) {
306-
out_file << "version: " << yaml_node_["version"].as<std::string>() << "\n";
325+
out_file << "version: " << yaml_node_["version"].as<std::string>()
326+
<< "\n";
307327
}
308328
if (yaml_node_["files"] && yaml_node_["files"].size()) {
309329
out_file << "files: # Can be relative OR absolute local file "
310-
"path\n";
330+
"path\n";
311331
for (const auto& source : yaml_node_["files"]) {
312332
out_file << " - " << source << "\n";
313333
}
314334
}
335+
if (yaml_node_["mmproj"]) {
336+
out_file << "mmproj: " << yaml_node_["mmproj"].as<std::string>() << "\n";
337+
}
315338

316339
out_file << "# END GENERAL GGUF METADATA\n";
317340
out_file << "\n";
@@ -330,9 +353,9 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
330353
out_file << "# BEGIN OPTIONAL\n";
331354
out_file << format_utils::WriteKeyValue("size", yaml_node_["size"]);
332355
out_file << format_utils::WriteKeyValue("stream", yaml_node_["stream"],
333-
"Default true?");
356+
"Default true?");
334357
out_file << format_utils::WriteKeyValue("top_p", yaml_node_["top_p"],
335-
"Ranges: 0 to 1");
358+
"Ranges: 0 to 1");
336359
out_file << format_utils::WriteKeyValue(
337360
"temperature", yaml_node_["temperature"], "Ranges: 0 to 1");
338361
out_file << format_utils::WriteKeyValue(
@@ -344,26 +367,26 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
344367
"Should be default to context length");
345368
out_file << format_utils::WriteKeyValue("seed", yaml_node_["seed"]);
346369
out_file << format_utils::WriteKeyValue("dynatemp_range",
347-
yaml_node_["dynatemp_range"]);
370+
yaml_node_["dynatemp_range"]);
348371
out_file << format_utils::WriteKeyValue("dynatemp_exponent",
349-
yaml_node_["dynatemp_exponent"]);
372+
yaml_node_["dynatemp_exponent"]);
350373
out_file << format_utils::WriteKeyValue("top_k", yaml_node_["top_k"]);
351374
out_file << format_utils::WriteKeyValue("min_p", yaml_node_["min_p"]);
352375
out_file << format_utils::WriteKeyValue("tfs_z", yaml_node_["tfs_z"]);
353376
out_file << format_utils::WriteKeyValue("typ_p", yaml_node_["typ_p"]);
354377
out_file << format_utils::WriteKeyValue("repeat_last_n",
355-
yaml_node_["repeat_last_n"]);
378+
yaml_node_["repeat_last_n"]);
356379
out_file << format_utils::WriteKeyValue("repeat_penalty",
357-
yaml_node_["repeat_penalty"]);
380+
yaml_node_["repeat_penalty"]);
358381
out_file << format_utils::WriteKeyValue("mirostat", yaml_node_["mirostat"]);
359382
out_file << format_utils::WriteKeyValue("mirostat_tau",
360-
yaml_node_["mirostat_tau"]);
383+
yaml_node_["mirostat_tau"]);
361384
out_file << format_utils::WriteKeyValue("mirostat_eta",
362-
yaml_node_["mirostat_eta"]);
385+
yaml_node_["mirostat_eta"]);
363386
out_file << format_utils::WriteKeyValue("penalize_nl",
364-
yaml_node_["penalize_nl"]);
387+
yaml_node_["penalize_nl"]);
365388
out_file << format_utils::WriteKeyValue("ignore_eos",
366-
yaml_node_["ignore_eos"]);
389+
yaml_node_["ignore_eos"]);
367390
out_file << format_utils::WriteKeyValue("n_probs", yaml_node_["n_probs"]);
368391
out_file << format_utils::WriteKeyValue("min_keep", yaml_node_["min_keep"]);
369392
out_file << format_utils::WriteKeyValue("grammar", yaml_node_["grammar"]);
@@ -374,7 +397,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
374397
out_file << "# BEGIN MODEL LOAD PARAMETERS\n";
375398
out_file << "# BEGIN REQUIRED\n";
376399
out_file << format_utils::WriteKeyValue("engine", yaml_node_["engine"],
377-
"engine to run model");
400+
"engine to run model");
378401
out_file << "prompt_template:";
379402
out_file << " " << yaml_node_["prompt_template"] << "\n";
380403
out_file << "# END REQUIRED\n";
@@ -384,11 +407,11 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
384407
"ctx_len", yaml_node_["ctx_len"],
385408
"llama.context_length | 0 or undefined = loaded from model");
386409
out_file << format_utils::WriteKeyValue("n_parallel",
387-
yaml_node_["n_parallel"]);
410+
yaml_node_["n_parallel"]);
388411
out_file << format_utils::WriteKeyValue("cpu_threads",
389-
yaml_node_["cpu_threads"]);
412+
yaml_node_["cpu_threads"]);
390413
out_file << format_utils::WriteKeyValue("ngl", yaml_node_["ngl"],
391-
"Undefined = loaded from model");
414+
"Undefined = loaded from model");
392415
out_file << "# END OPTIONAL\n";
393416
out_file << "# END MODEL LOAD PARAMETERS\n";
394417

engine/controllers/models.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,8 @@ void Models::StartModel(
533533
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
534534

535535
std::optional<std::string> mmproj;
536-
if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) {
537-
mmproj = o.asString();
536+
if (auto& o = (*(req->getJsonObject())); o.isMember("mmproj")) {
537+
mmproj = o["mmproj"].asString();
538538
}
539539

540540
auto bypass_llama_model_path = false;

engine/services/hardware_service.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ void HardwareService::UpdateHardwareInfos() {
304304
};
305305
for (auto const& he : b.value()) {
306306
if (!exists(he.uuid)) {
307-
db_service_->DeleteHardwareEntry(he.uuid);
307+
(void)db_service_->DeleteHardwareEntry(he.uuid);
308308
}
309309
}
310310

engine/services/model_service.cc

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ ModelService::ModelService(std::shared_ptr<DatabaseService> db_service,
155155
inference_svc_(inference_service),
156156
engine_svc_(engine_svc),
157157
task_queue_(task_queue) {
158-
// ProcessBgrTasks();
159-
};
158+
// ProcessBgrTasks();
159+
};
160160

161161
void ModelService::ForceIndexingModelList() {
162162
CTL_INF("Force indexing model list");
@@ -947,6 +947,15 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
947947
LOG_WARN << "model_path is empty";
948948
return StartModelResult{.success = false};
949949
}
950+
if (!mc.mmproj.empty()) {
951+
#if defined(_WIN32)
952+
json_data["mmproj"] = cortex::wc::WstringToUtf8(
953+
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).wstring());
954+
#else
955+
json_data["mmproj"] =
956+
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
957+
#endif
958+
}
950959
json_data["system_prompt"] = mc.system_template;
951960
json_data["user_prompt"] = mc.user_template;
952961
json_data["ai_prompt"] = mc.ai_template;
@@ -996,16 +1005,18 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
9961005
auto data = std::get<1>(ir);
9971006

9981007
if (status == drogon::k200OK) {
999-
// start model successfully, we store the metadata so we can use
1008+
// start model successfully, in case not vision model, we store the metadata so we can use
10001009
// for each inference
1001-
auto metadata_res = GetModelMetadata(model_handle);
1002-
if (metadata_res.has_value()) {
1003-
loaded_model_metadata_map_.emplace(model_handle,
1004-
std::move(metadata_res.value()));
1005-
CTL_INF("Successfully stored metadata for model " << model_handle);
1006-
} else {
1007-
CTL_WRN("Failed to get metadata for model " << model_handle << ": "
1008-
<< metadata_res.error());
1010+
if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) {
1011+
auto metadata_res = GetModelMetadata(model_handle);
1012+
if (metadata_res.has_value()) {
1013+
loaded_model_metadata_map_.emplace(model_handle,
1014+
std::move(metadata_res.value()));
1015+
CTL_INF("Successfully stored metadata for model " << model_handle);
1016+
} else {
1017+
CTL_WRN("Failed to get metadata for model " << model_handle << ": "
1018+
<< metadata_res.error());
1019+
}
10091020
}
10101021

10111022
return StartModelResult{.success = true,

0 commit comments

Comments
 (0)