Skip to content

Commit 2fba9d8

Browse files
committed
Merge branch 'main' into fix-deprecated
2 parents 1f3096a + 4dc2609 commit 2fba9d8

File tree

1 file changed

+11
-16
lines changed

1 file changed

+11
-16
lines changed

llama_cpp/llama_cpp.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@
159159

160160
# struct llama_vocab;
161161
llama_vocab_p = NewType("llama_vocab_p", int)
162-
llama_vocab_p_ctypes = ctypes.c_int32
162+
llama_vocab_p_ctypes = ctypes.c_void_p
163163

164164
# # struct llama_sampler;
165165
# llama_sampler_p = NewType("llama_sampler_p", int)
@@ -244,7 +244,7 @@
244244
LLAMA_VOCAB_PRE_TYPE_DBRX = 13
245245
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
246246
LLAMA_VOCAB_PRE_TYPE_PORO = 15
247-
LLAMA_VOCAV_PRE_TYPE_CHATGLM3 = 16
247+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16
248248
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17
249249
LLAMA_VOCAB_PRE_TYPE_VIKING = 18
250250
LLAMA_VOCAB_PRE_TYPE_JAIS = 19
@@ -1266,12 +1266,6 @@ def llama_n_seq_max(ctx: llama_context_p, /) -> int:
12661266
...
12671267

12681268

1269-
# LLAMA_API int32_t llama_vocab_n_tokens(const struct llama_vocab * vocab);
1270-
@ctypes_function("llama_vocab_n_tokens", [llama_vocab_p_ctypes], ctypes.c_int32)
1271-
def llama_vocab_n_tokens(vocab: llama_vocab_p, /) -> int:
1272-
...
1273-
1274-
12751269
# LLAMA_API int32_t llama_model_n_ctx_train(const struct llama_model * model);
12761270
@ctypes_function("llama_model_n_ctx_train", [llama_model_p_ctypes], ctypes.c_int32)
12771271
def llama_model_n_ctx_train(model: llama_model_p, /) -> int:
@@ -1308,7 +1302,7 @@ def llama_pooling_type(ctx: llama_context_p, /) -> int:
13081302
...
13091303

13101304
# LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
1311-
@ctypes_function("llama_model_get_vocab", [llama_model_p_ctypes], ctypes.c_int32)
1305+
@ctypes_function("llama_model_get_vocab", [llama_model_p_ctypes], llama_vocab_p)
13121306
def llama_model_get_vocab(model: llama_model_p, /) -> Optional[llama_vocab_p]:
13131307
...
13141308

@@ -1330,7 +1324,7 @@ def llama_vocab_type(vocab: llama_vocab_p, /) -> int:
13301324

13311325

13321326
# LLAMA_API int32_t llama_vocab_n_tokens(const struct llama_vocab * vocab);
1333-
@ctypes_function("llama_vocab_n_tokens", [llama_vocab_p_ctypes], ctypes.c_int)
1327+
@ctypes_function("llama_vocab_n_tokens", [llama_vocab_p_ctypes], ctypes.c_int32)
13341328
def llama_vocab_n_tokens(vocab: llama_vocab_p, /) -> int:
13351329
...
13361330

@@ -2948,19 +2942,17 @@ def llama_detokenize(
29482942
# // Chat templates
29492943
# //
29502944

2951-
29522945
# /// Apply chat template. Inspired by hf apply_chat_template() on python.
29532946
# /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
29542947
# /// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
2955-
# /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the models default chat template will be used instead.
2948+
# /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model's default chat template will be used instead.
29562949
# /// @param chat Pointer to a list of multiple llama_chat_message
29572950
# /// @param n_msg Number of llama_chat_message in this chat
29582951
# /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
29592952
# /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
29602953
# /// @param length The size of the allocated buffer
29612954
# /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
29622955
# LLAMA_API int32_t llama_chat_apply_template(
2963-
# const struct llama_model * model,
29642956
# const char * tmpl,
29652957
# const struct llama_chat_message * chat,
29662958
# size_t n_msg,
@@ -2970,19 +2962,22 @@ def llama_detokenize(
29702962
@ctypes_function(
29712963
"llama_chat_apply_template",
29722964
[
2973-
ctypes.c_void_p,
29742965
ctypes.c_char_p,
29752966
ctypes.POINTER(llama_chat_message),
29762967
ctypes.c_size_t,
2968+
ctypes.c_bool,
2969+
ctypes.c_char_p,
2970+
ctypes.c_int32
29772971
],
29782972
ctypes.c_int32,
29792973
)
29802974
def llama_chat_apply_template(
2981-
model: llama_model_p,
29822975
tmpl: bytes,
29832976
chat: CtypesArray[llama_chat_message],
29842977
n_msg: int,
2985-
/,
2978+
add_ass: bool,
2979+
buf: bytes,
2980+
length: int,
29862981
) -> int:
29872982
...
29882983

0 commit comments

Comments
 (0)