Skip to content

Commit 3d85b01

Browse files
jukofyorkarthw
authored andcommitted
Add mistral-v1, mistral-v3, mistral-v3-tekken and mistral-v7 chat template types (ggml-org#10572)
* Templates: `mistral-v1`, `mistral-v2`, `mistral-v3`, `mistral-v3-tekken` * Changed system message logic and added tests for all 4 * Invalid `system_message` instead of `content` fixed * Removed tab-indented lines * Added template code and test for `mistral-v7` * Added all tests. Fixed bug with `tmpl == "llama2"` test. * Replaced tabs with spaces. * Removed `'mistral-v2'` option as no (open) models ever used it * Removed all references to 'v2' template from comments * Update llama.cpp Fixed `trim_assistant_message` bug
1 parent 29ade18 commit 3d85b01

File tree

2 files changed

+103
-33
lines changed

2 files changed

+103
-33
lines changed

src/llama.cpp

Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21871,41 +21871,85 @@ static int32_t llama_chat_apply_template_internal(
2187121871
if (add_ass) {
2187221872
ss << "<|im_start|>assistant\n";
2187321873
}
21874-
} else if (tmpl == "llama2" || tmpl == "mistral" || tmpl_contains("[INST]")) {
21875-
// llama2 template and its variants
21876-
// [variant] support system message
21877-
bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "mistral";
21878-
// [variant] space before + after response
21879-
bool space_around_response = tmpl_contains("' ' + eos_token");
21880-
// [variant] add BOS inside history
21881-
bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
21882-
// [variant] trim spaces from the input message
21883-
bool strip_message = tmpl_contains("content.strip()");
21884-
// construct the prompt
21885-
bool is_inside_turn = true; // skip BOS at the beginning
21886-
ss << "[INST] ";
21887-
for (auto message : chat) {
21888-
std::string content = strip_message ? trim(message->content) : message->content;
21889-
std::string role(message->role);
21890-
if (!is_inside_turn) {
21891-
is_inside_turn = true;
21892-
ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
21874+
} else if (tmpl == "llama2" || tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
21875+
if (tmpl == "mistral-v7" || tmpl_contains("[SYSTEM_PROMPT]")) {
21876+
// Official mistral 'v7' template
21877+
// See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
21878+
for (auto message : chat) {
21879+
std::string role(message->role);
21880+
std::string content(message->content);
21881+
if (role == "system") {
21882+
ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
21883+
} else if (role == "user") {
21884+
ss << "[INST] " << content << "[/INST]";
21885+
}
21886+
else {
21887+
ss << " " << content << "</s>";
21888+
}
2189321889
}
21894-
if (role == "system") {
21895-
if (support_system_message) {
21896-
ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
21890+
} else if (tmpl == "mistral-v1" || tmpl == "mistral-v3" || tmpl == "mistral-v3-tekken"
21891+
|| tmpl_contains("' [INST] ' + system_message") // catches official 'v1' template
21892+
|| tmpl_contains("[AVAILABLE_TOOLS]")) { // catches official 'v3' and 'v3-tekken' templates
21893+
// Official mistral 'v1', 'v3' and 'v3-tekken' templates
21894+
// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
21895+
// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
21896+
std::string leading_space = (tmpl == "mistral-v1" || tmpl_contains(" [INST]") ? " " : "");
21897+
std::string trailing_space = (tmpl == "mistral-v3-tekken" || tmpl_contains("\"[INST]\"") ? "" : " ");
21898+
bool trim_assistant_message = tmpl_contains("|trim + eos_token");
21899+
bool is_inside_turn = false;
21900+
for (auto message : chat) {
21901+
if (!is_inside_turn) {
21902+
ss << leading_space << "[INST]" << trailing_space;
21903+
is_inside_turn = true;
21904+
}
21905+
std::string role(message->role);
21906+
std::string content(message->content);
21907+
if (role == "system") {
21908+
ss << content << "\n\n";
21909+
} else if (role == "user") {
21910+
ss << content << leading_space << "[/INST]";
2189721911
} else {
21898-
// if the model does not support system message, we still include it in the first message, but without <<SYS>>
21899-
ss << content << "\n";
21912+
ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
21913+
is_inside_turn = false;
21914+
}
21915+
}
21916+
} else {
21917+
// llama2 template and its variants
21918+
// [variant] support system message
21919+
// See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
21920+
bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "llama2";
21921+
// [variant] space before + after response
21922+
bool space_around_response = tmpl_contains("' ' + eos_token");
21923+
// [variant] add BOS inside history
21924+
bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
21925+
// [variant] trim spaces from the input message
21926+
bool strip_message = tmpl_contains("content.strip()");
21927+
// construct the prompt
21928+
bool is_inside_turn = true; // skip BOS at the beginning
21929+
ss << "[INST] ";
21930+
for (auto message : chat) {
21931+
std::string content = strip_message ? trim(message->content) : message->content;
21932+
std::string role(message->role);
21933+
if (!is_inside_turn) {
21934+
is_inside_turn = true;
21935+
ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
21936+
}
21937+
if (role == "system") {
21938+
if (support_system_message) {
21939+
ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
21940+
} else {
21941+
// if the model does not support system message, we still include it in the first message, but without <<SYS>>
21942+
ss << content << "\n";
21943+
}
21944+
} else if (role == "user") {
21945+
ss << content << " [/INST]";
21946+
} else {
21947+
ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
21948+
is_inside_turn = false;
2190021949
}
21901-
} else if (role == "user") {
21902-
ss << content << " [/INST]";
21903-
} else {
21904-
ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
21905-
is_inside_turn = false;
2190621950
}
21951+
// llama2 templates seem to not care about "add_generation_prompt
2190721952
}
21908-
// llama2 templates seem to not care about "add_generation_prompt"
2190921953
} else if (tmpl == "phi3" || (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>"))) {
2191021954
// Phi 3
2191121955
for (auto message : chat) {

0 commit comments

Comments
 (0)