Skip to content

Commit fc49eab

Browse files
committed
feat: add warmup model back
1 parent e08e7d0 commit fc49eab

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

controllers/llamaCPP.cc

+15-13
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,19 @@ std::string create_return_json(const std::string &id, const std::string &model,
9696
}
9797

9898
void llamaCPP::warmupModel() {
99-
// json pseudo;
100-
//
101-
// pseudo["prompt"] = "Hello";
102-
// pseudo["n_predict"] = 10;
103-
// const int task_id = llama.request_completion(pseudo, false);
104-
// std::string completion_text;
105-
// task_result result = llama.next_result(task_id);
106-
// if (!result.error && result.stop) {
107-
// LOG_INFO << result.result_json.dump(-1, ' ', false,
108-
// json::error_handler_t::replace);
109-
// }
110-
// return;
99+
json pseudo;
100+
101+
pseudo["prompt"] = "Hello";
102+
pseudo["n_predict"] = 10;
103+
pseudo["stream"] = false;
104+
const int task_id = llama.request_completion(pseudo, false, false);
105+
std::string completion_text;
106+
task_result result = llama.next_result(task_id);
107+
if (!result.error && result.stop) {
108+
LOG_INFO << result.result_json.dump(-1, ' ', false,
109+
json::error_handler_t::replace);
110+
}
111+
return;
111112
}
112113

113114
void llamaCPP::chatCompletion(
@@ -365,10 +366,11 @@ void llamaCPP::loadModel(
365366
jsonResp["message"] = "Model loaded successfully";
366367
model_loaded = true;
367368
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
368-
// warmupModel();
369369

370370
LOG_INFO << "Started background task here!";
371371
backgroundThread = std::thread(&llamaCPP::backgroundTask, this);
372+
warmupModel();
373+
372374
callback(resp);
373375
}
374376

0 commit comments

Comments
 (0)