Skip to content

Commit ce1fbe0

Browse files
fix: context length for model import (#2043)
Co-authored-by: sangjanai <[email protected]>
1 parent 24cf3e5 commit ce1fbe0

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

engine/config/gguf_parser.cc

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
#include <cstdint>
33
#include <cstring>
44
#include <ctime>
5+
#include <filesystem>
56
#include <iostream>
67
#include <regex>
78
#include <stdexcept>
89
#include <string>
910
#include <vector>
10-
#include <filesystem>
1111

1212
#ifdef _WIN32
1313
#include <io.h>
@@ -70,7 +70,7 @@ void GGUFHandler::OpenFile(const std::string& file_path) {
7070

7171
#else
7272
file_size_ = std::filesystem::file_size(file_path);
73-
73+
7474
int file_descriptor = open(file_path.c_str(), O_RDONLY);
7575
// Memory-map the file
7676
data_ = static_cast<uint8_t*>(
@@ -105,7 +105,8 @@ std::pair<std::size_t, std::string> GGUFHandler::ReadString(
105105
std::memcpy(&length, data_ + offset, sizeof(uint64_t));
106106

107107
if (offset + 8 + length > file_size_) {
108-
throw std::runtime_error("GGUF metadata string length exceeds file size.\n");
108+
throw std::runtime_error(
109+
"GGUF metadata string length exceeds file size.\n");
109110
}
110111

111112
std::string value(reinterpret_cast<const char*>(data_ + offset + 8), length);
@@ -578,9 +579,8 @@ void GGUFHandler::ModelConfigFromMetadata() {
578579
model_config_.model = name;
579580
model_config_.id = name;
580581
model_config_.version = std::to_string(version);
581-
model_config_.max_tokens =
582-
std::min<int>(kDefaultMaxContextLength, max_tokens);
583-
model_config_.ctx_len = std::min<int>(kDefaultMaxContextLength, max_tokens);
582+
model_config_.max_tokens = max_tokens;
583+
model_config_.ctx_len = max_tokens;
584584
model_config_.ngl = ngl;
585585
}
586586

engine/services/model_service.cc

+3
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
951951
json_data["user_prompt"] = mc.user_template;
952952
json_data["ai_prompt"] = mc.ai_template;
953953
json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len);
954+
json_data["max_tokens"] = std::min(kDefautlContextLength, mc.ctx_len);
954955
max_model_context_length = mc.ctx_len;
955956
} else {
956957
bypass_stop_check_set_.insert(model_handle);
@@ -975,6 +976,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
975976
if (ctx_len) {
976977
json_data["ctx_len"] =
977978
std::min(ctx_len.value(), max_model_context_length);
979+
json_data["max_tokens"] =
980+
std::min(ctx_len.value(), max_model_context_length);
978981
}
979982
CTL_INF(json_data.toStyledString());
980983
auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(),

0 commit comments

Comments
 (0)