@@ -21871,41 +21871,85 @@ static int32_t llama_chat_apply_template_internal(
2187121871 if (add_ass) {
2187221872 ss << "<|im_start|>assistant\n";
2187321873 }
21874- } else if (tmpl == "llama2" || tmpl == "mistral" || tmpl_contains("[INST]")) {
21875- // llama2 template and its variants
21876- // [variant] support system message
21877- bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "mistral";
21878- // [variant] space before + after response
21879- bool space_around_response = tmpl_contains("' ' + eos_token");
21880- // [variant] add BOS inside history
21881- bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
21882- // [variant] trim spaces from the input message
21883- bool strip_message = tmpl_contains("content.strip()");
21884- // construct the prompt
21885- bool is_inside_turn = true; // skip BOS at the beginning
21886- ss << "[INST] ";
21887- for (auto message : chat) {
21888- std::string content = strip_message ? trim(message->content) : message->content;
21889- std::string role(message->role);
21890- if (!is_inside_turn) {
21891- is_inside_turn = true;
21892- ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
21874+ } else if (tmpl == "llama2" || tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
21875+ if (tmpl == "mistral-v7" || tmpl_contains("[SYSTEM_PROMPT]")) {
21876+ // Official mistral 'v7' template
21877+ // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
21878+ for (auto message : chat) {
21879+ std::string role(message->role);
21880+ std::string content(message->content);
21881+ if (role == "system") {
21882+ ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
21883+ } else if (role == "user") {
21884+ ss << "[INST] " << content << "[/INST]";
21885+ }
21886+ else {
21887+ ss << " " << content << "</s>";
21888+ }
2189321889 }
21894- if (role == "system") {
21895- if (support_system_message) {
21896- ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
21890+ } else if (tmpl == "mistral-v1" || tmpl == "mistral-v3" || tmpl == "mistral-v3-tekken"
21891+ || tmpl_contains("' [INST] ' + system_message") // catches official 'v1' template
21892+ || tmpl_contains("[AVAILABLE_TOOLS]")) { // catches official 'v3' and 'v3-tekken' templates
21893+ // Official mistral 'v1', 'v3' and 'v3-tekken' templates
21894+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
21895+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
21896+ std::string leading_space = (tmpl == "mistral-v1" || tmpl_contains(" [INST]") ? " " : "");
21897+ std::string trailing_space = (tmpl == "mistral-v3-tekken" || tmpl_contains("\"[INST]\"") ? "" : " ");
21898+ bool trim_assistant_message = tmpl_contains("|trim + eos_token");
21899+ bool is_inside_turn = false;
21900+ for (auto message : chat) {
21901+ if (!is_inside_turn) {
21902+ ss << leading_space << "[INST]" << trailing_space;
21903+ is_inside_turn = true;
21904+ }
21905+ std::string role(message->role);
21906+ std::string content(message->content);
21907+ if (role == "system") {
21908+ ss << content << "\n\n";
21909+ } else if (role == "user") {
21910+ ss << content << leading_space << "[/INST]";
2189721911 } else {
21898- // if the model does not support system message, we still include it in the first message, but without <<SYS>>
21899- ss << content << "\n";
21912+ ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
21913+ is_inside_turn = false;
21914+ }
21915+ }
21916+ } else {
21917+ // llama2 template and its variants
21918+ // [variant] support system message
21919+ // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
21920+ bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "llama2";
21921+ // [variant] space before + after response
21922+ bool space_around_response = tmpl_contains("' ' + eos_token");
21923+ // [variant] add BOS inside history
21924+ bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
21925+ // [variant] trim spaces from the input message
21926+ bool strip_message = tmpl_contains("content.strip()");
21927+ // construct the prompt
21928+ bool is_inside_turn = true; // skip BOS at the beginning
21929+ ss << "[INST] ";
21930+ for (auto message : chat) {
21931+ std::string content = strip_message ? trim(message->content) : message->content;
21932+ std::string role(message->role);
21933+ if (!is_inside_turn) {
21934+ is_inside_turn = true;
21935+ ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
21936+ }
21937+ if (role == "system") {
21938+ if (support_system_message) {
21939+ ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
21940+ } else {
21941+ // if the model does not support system message, we still include it in the first message, but without <<SYS>>
21942+ ss << content << "\n";
21943+ }
21944+ } else if (role == "user") {
21945+ ss << content << " [/INST]";
21946+ } else {
21947+ ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
21948+ is_inside_turn = false;
2190021949 }
21901- } else if (role == "user") {
21902- ss << content << " [/INST]";
21903- } else {
21904- ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
21905- is_inside_turn = false;
2190621950 }
21951+ // llama2 templates seem to not care about "add_generation_prompt
2190721952 }
21908- // llama2 templates seem to not care about "add_generation_prompt"
2190921953 } else if (tmpl == "phi3" || (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>"))) {
2191021954 // Phi 3
2191121955 for (auto message : chat) {
0 commit comments