55#include " sampling.h"
66#include " llama.h"
77#include " chat.h"
8+ #include " chat-memory/chat_memory.h"
89
910#include < cstdio>
1011#include < cstring>
@@ -276,14 +277,20 @@ int main(int argc, char ** argv) {
276277 return formatted;
277278 };
278279
280+ // chat memory
281+ auto & chat_memory = get_or_create_chat_memory (" default" );
282+
279283 std::string prompt;
280284 {
285+ std::string memory_prompt = chat_memory.format_injection_prompt ();
281286 if (params.conversation_mode && params.enable_chat_template ) {
282287 if (!params.system_prompt .empty ()) {
283288 // format the system prompt (will use template default if empty)
284289 chat_add_and_format (" system" , params.system_prompt );
285290 }
286291
292+ // We'll add this back into the system prompt in llama-chat.cpp
293+ chat_add_and_format (" system" , memory_prompt);
287294 if (!params.prompt .empty ()) {
288295 // format and append the user prompt
289296 chat_add_and_format (" user" , params.prompt );
@@ -300,7 +307,7 @@ int main(int argc, char ** argv) {
300307 }
301308 } else {
302309 // otherwise use the prompt as is
303- prompt = params.prompt ;
310+ prompt = memory_prompt + " \n\n " + params.prompt ;
304311 }
305312
306313 if (params.interactive_first || !prompt.empty () || session_tokens.empty ()) {
@@ -909,6 +916,8 @@ int main(int argc, char ** argv) {
909916 output_tokens.push_back (token);
910917 output_ss << common_token_to_piece (ctx, token);
911918 }
919+ // inject session memory parsing
920+ chat_memory.parse_and_execute_command (output_ss.str ());
912921
913922 // reset assistant message
914923 assistant_ss.str (" " );
0 commit comments