From 63c6042118f75e342e614ba010e9a1c88af5fd19 Mon Sep 17 00:00:00 2001 From: yumeyao Date: Wed, 2 Apr 2025 14:17:06 +0800 Subject: [PATCH] Use string_view::find() to search for tokenization to avoid unnecessary looking up beyond the fragment range --- src/llama-vocab.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 31e2055f83611..934f3ed0ddf5d 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2220,14 +2220,12 @@ void llama_vocab::impl::tokenizer_st_partition(std::forward_list length - if (match + text.length() > raw_text_base_offset + raw_text_base_length) break; - #ifdef PRETOKENIZERDEBUG LLAMA_LOG_WARN("FF: (%ld %ld %ld) '%s'\n", raw_text->length(), raw_text_base_offset, raw_text_base_length, raw_text->substr(raw_text_base_offset, raw_text_base_length).c_str()); #endif