Skip to content

Commit 5a71fb6

Browse files
authored
Merge pull request #165 from janhq/164-bug-caching-still-not-properly-implemented
hotfix: caching
2 parents a9a90a1 + d47322d commit 5a71fb6

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

controllers/llamaCPP.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,17 @@ void llamaCPP::chatCompletion(
157157
// To set default value
158158

159159
if (jsonBody) {
160+
// Default values to enable auto caching
161+
data["cache_prompt"] = true;
162+
data["n_keep"] = -1;
163+
160164
data["stream"] = (*jsonBody).get("stream", false).asBool();
161165
data["n_predict"] = (*jsonBody).get("max_tokens", 500).asInt();
162166
data["top_p"] = (*jsonBody).get("top_p", 0.95).asFloat();
163167
data["temperature"] = (*jsonBody).get("temperature", 0.8).asFloat();
164168
data["frequency_penalty"] =
165169
(*jsonBody).get("frequency_penalty", 0).asFloat();
166170
data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat();
167-
data["cache_prompt"] = true;
168171
const Json::Value &messages = (*jsonBody)["messages"];
169172
for (const auto &message : messages) {
170173
std::string input_role = message["role"].asString();

0 commit comments

Comments
 (0)