Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit f49bd78

Browse files
authored
Merge pull request #122 from janhq/unload-model-stop-background
Unload model stop background
2 parents a438284 + d39da8e commit f49bd78

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

controllers/llamaCPP.cc

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,25 @@ void llamaCPP::embedding(
196196
return;
197197
}
198198

199+
void llamaCPP::unloadModel(
200+
const HttpRequestPtr &req,
201+
std::function<void(const HttpResponsePtr &)> &&callback) {
202+
Json::Value jsonResp;
203+
jsonResp["message"] = "No model loaded";
204+
if (model_loaded) {
205+
stopBackgroundTask();
206+
207+
llama_free(llama.ctx);
208+
llama_free_model(llama.model);
209+
llama.ctx = nullptr;
210+
llama.model = nullptr;
211+
jsonResp["message"] = "Model unloaded successfully";
212+
}
213+
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
214+
callback(resp);
215+
return;
216+
}
217+
199218
void llamaCPP::loadModel(
200219
const HttpRequestPtr &req,
201220
std::function<void(const HttpResponsePtr &)> &&callback) {
@@ -274,7 +293,19 @@ void llamaCPP::loadModel(
274293

275294
void llamaCPP::backgroundTask() {
276295
while (model_loaded) {
277-
model_loaded = llama.update_slots();
296+
// model_loaded =
297+
llama.update_slots();
278298
}
299+
LOG_INFO << "Background task stopped!";
279300
return;
280301
}
302+
303+
void llamaCPP::stopBackgroundTask() {
304+
if (model_loaded) {
305+
model_loaded = false;
306+
LOG_INFO << "changed to false";
307+
if (backgroundThread.joinable()) {
308+
backgroundThread.join();
309+
}
310+
}
311+
}

controllers/llamaCPP.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,8 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
21242124
METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post);
21252125
METHOD_ADD(llamaCPP::embedding, "embedding", Post);
21262126
METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post);
2127+
METHOD_ADD(llamaCPP::unloadModel, "unloadmodel", Get);
2128+
21272129
// PATH_ADD("/llama/chat_completion", Post);
21282130
METHOD_LIST_END
21292131
void chatCompletion(const HttpRequestPtr &req,
@@ -2132,13 +2134,17 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
21322134
std::function<void(const HttpResponsePtr &)> &&callback);
21332135
void loadModel(const HttpRequestPtr &req,
21342136
std::function<void(const HttpResponsePtr &)> &&callback);
2137+
void unloadModel(const HttpRequestPtr &req,
2138+
std::function<void(const HttpResponsePtr &)> &&callback);
21352139
void warmupModel();
21362140

21372141
void backgroundTask();
21382142

2143+
void stopBackgroundTask();
2144+
21392145
private:
21402146
llama_server_context llama;
2141-
bool model_loaded = false;
2147+
std::atomic<bool> model_loaded = false;
21422148
size_t sent_count = 0;
21432149
size_t sent_token_probs_index = 0;
21442150
std::thread backgroundThread;

0 commit comments

Comments
 (0)