@@ -96,18 +96,19 @@ std::string create_return_json(const std::string &id, const std::string &model,
96
96
}
97
97
98
98
void llamaCPP::warmupModel () {
99
- // json pseudo;
100
- //
101
- // pseudo["prompt"] = "Hello";
102
- // pseudo["n_predict"] = 10;
103
- // const int task_id = llama.request_completion(pseudo, false);
104
- // std::string completion_text;
105
- // task_result result = llama.next_result(task_id);
106
- // if (!result.error && result.stop) {
107
- // LOG_INFO << result.result_json.dump(-1, ' ', false,
108
- // json::error_handler_t::replace);
109
- // }
110
- // return;
99
+ json pseudo;
100
+
101
+ pseudo[" prompt" ] = " Hello" ;
102
+ pseudo[" n_predict" ] = 10 ;
103
+ pseudo[" stream" ] = false ;
104
+ const int task_id = llama.request_completion (pseudo, false , false );
105
+ std::string completion_text;
106
+ task_result result = llama.next_result (task_id);
107
+ if (!result.error && result.stop ) {
108
+ LOG_INFO << result.result_json .dump (-1 , ' ' , false ,
109
+ json::error_handler_t ::replace);
110
+ }
111
+ return ;
111
112
}
112
113
113
114
void llamaCPP::chatCompletion (
@@ -365,10 +366,11 @@ void llamaCPP::loadModel(
365
366
jsonResp[" message" ] = " Model loaded successfully" ;
366
367
model_loaded = true ;
367
368
auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
368
- // warmupModel();
369
369
370
370
LOG_INFO << " Started background task here!" ;
371
371
backgroundThread = std::thread (&llamaCPP::backgroundTask, this );
372
+ warmupModel ();
373
+
372
374
callback (resp);
373
375
}
374
376
0 commit comments