Skip to content

Commit 19c5d73

Browse files
authored
Merge pull request #2041 from janhq/s/chore/sync-dev-to-main
chore: sync dev to main
2 parents 3cc5c17 + 8e7d17c commit 19c5d73

20 files changed

+494
-286
lines changed

docs/docs/architecture/cortex-db.mdx

+7-7
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ This document outlines Cortex database architecture which is designed to store a
1616
files and more.
1717

1818
## Table Structure
19-
### schema Table
20-
The `schema` table is designed to hold schema version for cortex database. Below is the structure of the table:
19+
### schema_version Table
20+
The `schema_version` table is designed to hold schema version for cortex database. Below is the structure of the table:
2121

2222
| Column Name | Data Type | Description |
2323
|--------------------|-----------|---------------------------------------------------------|
24-
| schema_version | INTEGER | A unique schema version for database. |
24+
| version | INTEGER | A unique schema version for database. |
2525

2626
### models Table
2727
The `models` table is designed to hold metadata about various AI models. Below is the structure of the table:
@@ -63,10 +63,10 @@ Below is the structure of the table:
6363
| api_key | TEXT | |
6464
| url | TEXT | |
6565
| version | TEXT | The current version of the engine. |
66-
| variant | TEXT | |
66+
| variant | TEXT | A string that specifies the specific configuration or build variant of the engine. |
6767
| status | TEXT | Current status of the engine (e.g., "downloaded", "downloadable"). |
6868
| metadata | TEXT | Additional metadata or information about the engine. |
69-
| date_ceated | TEXT | Date when the engine was downloaded. |
69+
| date_created | TEXT | Date when the engine was downloaded. |
7070
| date_updated | TEXT | Date when the engine was last updated. |
7171

7272
### files Table
@@ -78,5 +78,5 @@ The `files` table is designed to hold metadata about objects dowloaded via Corte
7878
| object | TEXT | The type of hardware. |
7979
| purpose | TEXT | Purpose of file |
8080
| filename | TEXT | The name of the file. |
81-
| created_at | INTEGER | Date when file was created |
82-
| bytes | INTEGER | |
81+
| created_at | INTEGER | Date when file was created. |
82+
| bytes | INTEGER | Size of the file on disk in bytes. |

engine/common/hardware_common.h

+11
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@ struct NvidiaAddInfo {
6969
};
7070
struct AmdAddInfo {};
7171
using GPUAddInfo = std::variant<NvidiaAddInfo, AmdAddInfo>;
72+
73+
enum class GpuType {
74+
kGpuTypeOther = 0,
75+
kGpuTypeIntegrated = 1,
76+
kGpuTypeDiscrete = 2,
77+
kGpuTypeVirtual = 3,
78+
kGpuTypeCpu = 4,
79+
kGpuTypeMaxEnum = 0x7FFFFFFF
80+
};
81+
7282
struct GPU {
7383
std::string id;
7484
uint32_t device_id;
@@ -80,6 +90,7 @@ struct GPU {
8090
std::string uuid;
8191
bool is_activated = true;
8292
std::string vendor;
93+
GpuType gpu_type;
8394
};
8495

8596
inline Json::Value ToJson(const std::vector<GPU>& gpus) {

engine/controllers/models.cc

+4-5
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,10 @@ void Models::ListModel(
218218
obj["id"] = model_entry.model;
219219
obj["model"] = model_entry.model;
220220
obj["status"] = "downloaded";
221-
// TODO(sang) Temporarily remove this estimation
222-
// auto es = model_service_->GetEstimation(model_entry.model);
223-
// if (es.has_value() && !!es.value()) {
224-
// obj["recommendation"] = hardware::ToJson(*(es.value()));
225-
// }
221+
auto es = model_service_->GetEstimation(model_entry.model);
222+
if (es.has_value()) {
223+
obj["recommendation"] = hardware::ToJson(*es);
224+
}
226225
data.append(std::move(obj));
227226
yaml_handler.Reset();
228227
} else if (model_config.engine == kPythonEngine) {

engine/main.cc

+6-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "utils/file_manager_utils.h"
3838
#include "utils/logging_utils.h"
3939
#include "utils/system_info_utils.h"
40+
#include "utils/task_queue.h"
4041

4142
#if defined(__APPLE__) && defined(__MACH__)
4243
#include <libgen.h> // for dirname()
@@ -177,8 +178,11 @@ void RunServer(std::optional<std::string> host, std::optional<int> port,
177178
download_service, dylib_path_manager, db_service);
178179
auto inference_svc = std::make_shared<InferenceService>(engine_service);
179180
auto model_src_svc = std::make_shared<ModelSourceService>(db_service);
180-
auto model_service = std::make_shared<ModelService>(
181-
db_service, hw_service, download_service, inference_svc, engine_service);
181+
cortex::TaskQueue task_queue(
182+
std::min(2u, std::thread::hardware_concurrency()), "background_task");
183+
auto model_service =
184+
std::make_shared<ModelService>(db_service, hw_service, download_service,
185+
inference_svc, engine_service, task_queue);
182186
inference_svc->SetModelService(model_service);
183187

184188
auto file_watcher_srv = std::make_shared<FileWatcherService>(

engine/services/hardware_service.cc

+28-14
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ HardwareInfo HardwareService::GetHardwareInfo() {
5252
};
5353
}
5454

55-
return HardwareInfo{.cpu = cortex::hw::GetCPUInfo(),
55+
return HardwareInfo{.cpu = cpu_info_.GetCPUInfo(),
5656
.os = cortex::hw::GetOSInfo(),
5757
.ram = cortex::hw::GetMemoryInfo(),
5858
.storage = cortex::hw::GetStorageInfo(),
@@ -207,9 +207,6 @@ bool HardwareService::Restart(const std::string& host, int port) {
207207
if (!TryConnectToServer(host, port)) {
208208
return false;
209209
}
210-
std::cout << "Server started" << std::endl;
211-
std::cout << "API Documentation available at: http://" << host << ":"
212-
<< port << std::endl;
213210
}
214211

215212
#endif
@@ -322,23 +319,40 @@ void HardwareService::UpdateHardwareInfos() {
322319
}
323320
}
324321
CTL_INF("Activated GPUs before: " << debug_b);
322+
auto has_nvidia = [&gpus] {
323+
for (auto const& g : gpus) {
324+
if (g.vendor == cortex::hw::kNvidiaStr) {
325+
return true;
326+
}
327+
}
328+
return false;
329+
}();
330+
325331
for (auto const& gpu : gpus) {
326-
// ignore error
327-
// Note: only support NVIDIA for now, so hardware_id = software_id
328332
if (db_service_->HasHardwareEntry(gpu.uuid)) {
329333
auto res = db_service_->UpdateHardwareEntry(gpu.uuid, std::stoi(gpu.id),
330-
std::stoi(gpu.id));
334+
std::stoi(gpu.id));
331335
if (res.has_error()) {
332336
CTL_WRN(res.error());
333337
}
334338
} else {
335-
auto res =
336-
db_service_->AddHardwareEntry(HwEntry{.uuid = gpu.uuid,
337-
.type = "gpu",
338-
.hardware_id = std::stoi(gpu.id),
339-
.software_id = std::stoi(gpu.id),
340-
.activated = true,
341-
.priority = INT_MAX});
339+
// iGPU should be deactivated by default
340+
// Only activate Nvidia GPUs if both AMD and Nvidia GPUs exists
341+
auto activated = [&gpu, &gpus, has_nvidia] {
342+
if (gpu.gpu_type != cortex::hw::GpuType::kGpuTypeDiscrete)
343+
return false;
344+
if (has_nvidia && gpu.vendor != cortex::hw::kNvidiaStr)
345+
return false;
346+
return true;
347+
};
348+
349+
auto res = db_service_->AddHardwareEntry(
350+
HwEntry{.uuid = gpu.uuid,
351+
.type = "gpu",
352+
.hardware_id = std::stoi(gpu.id),
353+
.software_id = std::stoi(gpu.id),
354+
.activated = activated(),
355+
.priority = INT_MAX});
342356
if (res.has_error()) {
343357
CTL_WRN(res.error());
344358
}

engine/services/hardware_service.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#pragma once
22
#include <stdint.h>
3+
#include <mutex>
34
#include <string>
45
#include <vector>
5-
#include <mutex>
66

77
#include "common/hardware_config.h"
88
#include "database_service.h"
@@ -41,4 +41,5 @@ class HardwareService {
4141
std::shared_ptr<DatabaseService> db_service_ = nullptr;
4242
std::optional<cortex::hw::ActivateHardwareConfig> ahc_;
4343
std::mutex mtx_;
44+
cortex::hw::CpuInfo cpu_info_;
4445
};

engine/services/model_service.cc

+60-3
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,21 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
143143
}
144144
} // namespace
145145

146+
ModelService::ModelService(std::shared_ptr<DatabaseService> db_service,
147+
std::shared_ptr<HardwareService> hw_service,
148+
std::shared_ptr<DownloadService> download_service,
149+
std::shared_ptr<InferenceService> inference_service,
150+
std::shared_ptr<EngineServiceI> engine_svc,
151+
cortex::TaskQueue& task_queue)
152+
: db_service_(db_service),
153+
hw_service_(hw_service),
154+
download_service_{download_service},
155+
inference_svc_(inference_service),
156+
engine_svc_(engine_svc),
157+
task_queue_(task_queue) {
158+
ProcessBgrTasks();
159+
};
160+
146161
void ModelService::ForceIndexingModelList() {
147162
CTL_INF("Force indexing model list");
148163

@@ -331,8 +346,17 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
331346
return download_service_->AddTask(downloadTask, on_finished);
332347
}
333348

349+
std::optional<hardware::Estimation> ModelService::GetEstimation(
350+
const std::string& model_handle) {
351+
std::lock_guard l(es_mtx_);
352+
if (auto it = es_.find(model_handle); it != es_.end()) {
353+
return it->second;
354+
}
355+
return std::nullopt;
356+
}
357+
334358
cpp::result<std::optional<hardware::Estimation>, std::string>
335-
ModelService::GetEstimation(const std::string& model_handle,
359+
ModelService::EstimateModel(const std::string& model_handle,
336360
const std::string& kv_cache, int n_batch,
337361
int n_ubatch) {
338362
namespace fs = std::filesystem;
@@ -548,7 +572,7 @@ ModelService::DownloadModelFromCortexsoAsync(
548572
// Close the file
549573
pyvenv_cfg.close();
550574
// Add executable permission to python
551-
set_permission_utils::SetExecutePermissionsRecursive(venv_path);
575+
(void)set_permission_utils::SetExecutePermissionsRecursive(venv_path);
552576
} else {
553577
CTL_ERR("Failed to extract venv.zip");
554578
};
@@ -828,7 +852,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
828852
CTL_WRN("Error: " + res.error());
829853
for (auto& depend : depends) {
830854
if (depend != model_handle) {
831-
StopModel(depend);
855+
auto sr = StopModel(depend);
832856
}
833857
}
834858
return cpp::fail("Model failed to start dependency '" + depend +
@@ -945,6 +969,11 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
945969

946970
json_helper::MergeJson(json_data, params_override);
947971

972+
// Set default cpu_threads if it is not configured
973+
if (!json_data.isMember("cpu_threads")) {
974+
json_data["cpu_threads"] = GetCpuThreads();
975+
}
976+
948977
// Set the latest ctx_len
949978
if (ctx_len) {
950979
json_data["ctx_len"] =
@@ -1329,6 +1358,10 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
13291358
return warning;
13301359
}
13311360

1361+
int ModelService::GetCpuThreads() const {
1362+
return std::max(std::thread::hardware_concurrency() / 2, 1u);
1363+
}
1364+
13321365
cpp::result<std::shared_ptr<ModelMetadata>, std::string>
13331366
ModelService::GetModelMetadata(const std::string& model_id) const {
13341367
if (model_id.empty()) {
@@ -1381,4 +1414,28 @@ std::string ModelService::GetEngineByModelId(
13811414
auto mc = yaml_handler.GetModelConfig();
13821415
CTL_DBG(mc.engine);
13831416
return mc.engine;
1417+
}
1418+
1419+
void ModelService::ProcessBgrTasks() {
1420+
CTL_INF("Start processing background tasks")
1421+
auto cb = [this] {
1422+
CTL_DBG("Estimate model resource usage");
1423+
auto list_entry = db_service_->LoadModelList();
1424+
if (list_entry) {
1425+
for (const auto& model_entry : list_entry.value()) {
1426+
// Only process local models
1427+
if (model_entry.status == cortex::db::ModelStatus::Downloaded) {
1428+
auto es = EstimateModel(model_entry.model);
1429+
if (es.has_value()) {
1430+
std::lock_guard l(es_mtx_);
1431+
es_[model_entry.model] = es.value();
1432+
}
1433+
}
1434+
}
1435+
}
1436+
};
1437+
1438+
auto clone = cb;
1439+
task_queue_.RunInQueue(std::move(cb));
1440+
task_queue_.RunEvery(std::chrono::seconds(10), std::move(clone));
13841441
}

engine/services/model_service.h

+15-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "services/download_service.h"
1111
#include "services/hardware_service.h"
1212
#include "utils/hardware/gguf/gguf_file_estimate.h"
13+
#include "utils/task_queue.h"
1314

1415
class InferenceService;
1516

@@ -35,12 +36,8 @@ class ModelService {
3536
std::shared_ptr<HardwareService> hw_service,
3637
std::shared_ptr<DownloadService> download_service,
3738
std::shared_ptr<InferenceService> inference_service,
38-
std::shared_ptr<EngineServiceI> engine_svc)
39-
: db_service_(db_service),
40-
hw_service_(hw_service),
41-
download_service_{download_service},
42-
inference_svc_(inference_service),
43-
engine_svc_(engine_svc) {};
39+
std::shared_ptr<EngineServiceI> engine_svc,
40+
cortex::TaskQueue& task_queue);
4441

4542
cpp::result<std::string, std::string> AbortDownloadModel(
4643
const std::string& task_id);
@@ -81,7 +78,10 @@ class ModelService {
8178

8279
bool HasModel(const std::string& id) const;
8380

84-
cpp::result<std::optional<hardware::Estimation>, std::string> GetEstimation(
81+
std::optional<hardware::Estimation> GetEstimation(
82+
const std::string& model_handle);
83+
84+
cpp::result<std::optional<hardware::Estimation>, std::string> EstimateModel(
8585
const std::string& model_handle, const std::string& kv_cache = "f16",
8686
int n_batch = 2048, int n_ubatch = 2048);
8787

@@ -112,6 +112,10 @@ class ModelService {
112112
const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048,
113113
int n_ubatch = 2048, const std::string& kv_cache_type = "f16");
114114

115+
void ProcessBgrTasks();
116+
117+
int GetCpuThreads() const;
118+
115119
std::shared_ptr<DatabaseService> db_service_;
116120
std::shared_ptr<HardwareService> hw_service_;
117121
std::shared_ptr<DownloadService> download_service_;
@@ -124,4 +128,8 @@ class ModelService {
124128
*/
125129
std::unordered_map<std::string, std::shared_ptr<ModelMetadata>>
126130
loaded_model_metadata_map_;
131+
132+
std::mutex es_mtx_;
133+
std::unordered_map<std::string, std::optional<hardware::Estimation>> es_;
134+
cortex::TaskQueue& task_queue_;
127135
};

0 commit comments

Comments
 (0)