5
5
#include " sampling.h"
6
6
#include " llama.h"
7
7
#include " chat.h"
8
+ #include " chat-memory/chat_memory.h"
8
9
9
10
#include < cstdio>
10
11
#include < cstring>
@@ -276,14 +277,20 @@ int main(int argc, char ** argv) {
276
277
return formatted;
277
278
};
278
279
280
+ // chat memory
281
+ auto & chat_memory = get_or_create_chat_memory (" default" );
282
+
279
283
std::string prompt;
280
284
{
285
+ std::string memory_prompt = chat_memory.format_injection_prompt ();
281
286
if (params.conversation_mode && params.enable_chat_template ) {
282
287
if (!params.system_prompt .empty ()) {
283
288
// format the system prompt (will use template default if empty)
284
289
chat_add_and_format (" system" , params.system_prompt );
285
290
}
286
291
292
+ // We'll add this back into the system prompt in llama-chat.cpp
293
+ chat_add_and_format (" system" , memory_prompt);
287
294
if (!params.prompt .empty ()) {
288
295
// format and append the user prompt
289
296
chat_add_and_format (" user" , params.prompt );
@@ -300,7 +307,7 @@ int main(int argc, char ** argv) {
300
307
}
301
308
} else {
302
309
// otherwise use the prompt as is
303
- prompt = params.prompt ;
310
+ prompt = memory_prompt + " \n\n " + params.prompt ;
304
311
}
305
312
306
313
if (params.interactive_first || !prompt.empty () || session_tokens.empty ()) {
@@ -909,6 +916,8 @@ int main(int argc, char ** argv) {
909
916
output_tokens.push_back (token);
910
917
output_ss << common_token_to_piece (ctx, token);
911
918
}
919
+ // inject session memory parsing
920
+ chat_memory.parse_and_execute_command (output_ss.str ());
912
921
913
922
// reset assistant message
914
923
assistant_ss.str (" " );
0 commit comments