Skip to content

Commit 9f1a50f

Browse files
fix: add default cpu_threads (#1948)
* fix: add default cpu_threads * fix: use half of cpu threads --------- Co-authored-by: vansangpfiev <[email protected]>
1 parent 510ae28 commit 9f1a50f

File tree

4 files changed

+20
-5
lines changed

4 files changed

+20
-5
lines changed

engine/services/model_service.cc

+9
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,11 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
945945

946946
json_helper::MergeJson(json_data, params_override);
947947

948+
// Set default cpu_threads if it is not configured
949+
if (!json_data.isMember("cpu_threads")) {
950+
json_data["cpu_threads"] = GetCpuThreads();
951+
}
952+
948953
// Set the latest ctx_len
949954
if (ctx_len) {
950955
json_data["ctx_len"] =
@@ -1329,6 +1334,10 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
13291334
return warning;
13301335
}
13311336

1337+
int ModelService::GetCpuThreads() const {
1338+
return std::max(std::thread::hardware_concurrency() / 2, 1u);
1339+
}
1340+
13321341
cpp::result<std::shared_ptr<ModelMetadata>, std::string>
13331342
ModelService::GetModelMetadata(const std::string& model_id) const {
13341343
if (model_id.empty()) {

engine/services/model_service.h

+2
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ class ModelService {
112112
const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048,
113113
int n_ubatch = 2048, const std::string& kv_cache_type = "f16");
114114

115+
int GetCpuThreads() const;
116+
115117
std::shared_ptr<DatabaseService> db_service_;
116118
std::shared_ptr<HardwareService> hw_service_;
117119
std::shared_ptr<DownloadService> download_service_;

engine/utils/hardware/gguf/gguf_file.h

+8-4
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
#include <filesystem>
88
#include <iostream>
99
#include <memory>
10+
#include <optional>
1011
#include <string>
1112
#include <unordered_set>
1213
#include <variant>
1314
#include <vector>
14-
#include <optional>
1515

1616
#ifdef _WIN32
1717
#include <io.h>
@@ -23,8 +23,8 @@
2323
#endif
2424

2525
#include "ggml.h"
26-
#include "utils/string_utils.h"
2726
#include "utils/logging_utils.h"
27+
#include "utils/string_utils.h"
2828

2929
// #define GGUF_LOG(msg) \
3030
// do { \
@@ -246,11 +246,15 @@ struct GGUFHelper {
246246
file_size = std::filesystem::file_size(file_path);
247247

248248
int fd = open(file_path.c_str(), O_RDONLY);
249+
if (fd == -1) {
250+
CTL_INF("Failed to open file: " << file_path << ", error: " << errno);
251+
return false;
252+
}
249253
// Memory-map the file
250254
data = static_cast<uint8_t*>(
251255
mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0));
252256
if (data == MAP_FAILED) {
253-
perror("Error mapping file");
257+
CTL_INF("Error mapping file");
254258
close(fd);
255259
return false;
256260
}
@@ -482,7 +486,7 @@ struct GGUFFile {
482486
inline std::optional<GGUFFile> ParseGgufFile(const std::string& path) {
483487
GGUFFile gf;
484488
GGUFHelper h;
485-
if(!h.OpenAndMMap(path)) {
489+
if (!h.OpenAndMMap(path)) {
486490
return std::nullopt;
487491
}
488492

engine/utils/hardware/gguf/gguf_file_estimate.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ inline float GetQuantBit(const std::string& kv_cache_t) {
6464

6565
inline std::optional<Estimation> EstimateLLaMACppRun(
6666
const std::string& file_path, const RunConfig& rc) {
67-
Estimation res;
6867
// token_embeddings_size = n_vocab * embedding_length * 2 * quant_bit/16 bytes
6968
//RAM = token_embeddings_size + ((total_ngl-ngl) >=1 ? Output_layer_size + (total_ngl - ngl - 1 ) / (total_ngl-1) * (total_file_size - token_embeddings_size - Output_layer_size) : 0 ) (bytes)
7069

7170
// VRAM = total_file_size - RAM (bytes)
7271
auto gf = ParseGgufFile(file_path);
7372
if (!gf)
7473
return std::nullopt;
74+
Estimation res;
7575
int32_t embedding_length = 0;
7676
int64_t n_vocab = 0;
7777
int32_t num_block = 0;

0 commit comments

Comments
 (0)