@@ -27,6 +27,40 @@ std::shared_ptr<State> createState(int task_id, llamaCPP *instance) {
27
27
28
28
// --------------------------------------------
29
29
30
+ std::string create_embedding_payload (const std::vector<float > &embedding,
31
+ int prompt_tokens) {
32
+ Json::Value root;
33
+
34
+ root[" object" ] = " list" ;
35
+
36
+ Json::Value dataArray (Json::arrayValue);
37
+ Json::Value dataItem;
38
+
39
+ dataItem[" object" ] = " embedding" ;
40
+
41
+ Json::Value embeddingArray (Json::arrayValue);
42
+ for (const auto &value : embedding) {
43
+ embeddingArray.append (value);
44
+ }
45
+ dataItem[" embedding" ] = embeddingArray;
46
+ dataItem[" index" ] = 0 ;
47
+
48
+ dataArray.append (dataItem);
49
+ root[" data" ] = dataArray;
50
+
51
+ root[" model" ] = " _" ;
52
+
53
+ Json::Value usage;
54
+ usage[" prompt_tokens" ] = prompt_tokens;
55
+ usage[" total_tokens" ] = prompt_tokens; // Assuming total tokens equals prompt
56
+ // tokens in this context
57
+ root[" usage" ] = usage;
58
+
59
+ Json::StreamWriterBuilder writer;
60
+ writer[" indentation" ] = " " ; // Compact output
61
+ return Json::writeString (writer, root);
62
+ }
63
+
30
64
std::string create_full_return_json (const std::string &id,
31
65
const std::string &model,
32
66
const std::string &content,
@@ -245,17 +279,18 @@ void llamaCPP::embedding(
245
279
const auto &jsonBody = req->getJsonObject ();
246
280
247
281
json prompt;
248
- if (jsonBody->isMember (" content " ) != 0 ) {
249
- prompt = (*jsonBody)[" content " ].asString ();
282
+ if (jsonBody->isMember (" input " ) != 0 ) {
283
+ prompt = (*jsonBody)[" input " ].asString ();
250
284
} else {
251
285
prompt = " " ;
252
286
}
253
287
const int task_id = llama.request_completion (
254
288
{{" prompt" , prompt}, {" n_predict" , 0 }}, false , true );
255
289
task_result result = llama.next_result (task_id);
256
- std::string embeddingResp = result.result_json . dump () ;
290
+ std::vector< float > embedding_result = result.result_json [ " embedding " ] ;
257
291
auto resp = nitro_utils::nitroHttpResponse ();
258
- resp->setBody (embeddingResp);
292
+ std::string embedding_resp = create_embedding_payload (embedding_result, 0 );
293
+ resp->setBody (embedding_resp);
259
294
resp->setContentTypeString (" application/json" );
260
295
callback (resp);
261
296
return ;
@@ -363,7 +398,7 @@ void llamaCPP::loadModel(
363
398
llama.initialize ();
364
399
365
400
Json::Value jsonResp;
366
- jsonResp[" message" ] = " Failed to load model " ;
401
+ jsonResp[" message" ] = " Model loaded successfully " ;
367
402
model_loaded = true ;
368
403
auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
369
404
0 commit comments