Skip to content

Commit 39284a6

Browse files
Merge pull request #112 from menloresearch/update-dev-from-master-2025-06-02-00-09
Sync master with upstream release b5572
2 parents 034d0a8 + 7675c55 commit 39284a6

26 files changed

+3823
-3721
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
130130
<details>
131131
<summary>Bindings</summary>
132132

133+
- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
133134
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
134135
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
135136
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)

convert_hf_to_gguf.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3814,7 +3814,7 @@ def _xlmroberta_set_vocab(self) -> None:
38143814
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
38153815
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
38163816

3817-
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
3817+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
38183818
else:
38193819
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
38203820
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
@@ -3827,7 +3827,7 @@ def _xlmroberta_set_vocab(self) -> None:
38273827
tokenizer = SentencePieceProcessor()
38283828
tokenizer.LoadFromFile(str(tokenizer_path))
38293829

3830-
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
3830+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
38313831

38323832
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
38333833
scores: list[float] = [-10000.0] * vocab_size
@@ -3857,33 +3857,26 @@ def _xlmroberta_set_vocab(self) -> None:
38573857
unk_token = tokenizer_config_json.get("unk_token")
38583858
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
38593859

3860-
for token_id in range(vocab_size):
3860+
for token_id in range(tokenizer.vocab_size):
38613861
piece = tokenizer._convert_id_to_token(token_id)
3862-
text = piece.encode("utf-8")
3863-
score = tokenizer_json["model"]["vocab"][token_id][1]
3864-
3865-
toktype = SentencePieceTokenTypes.NORMAL
3866-
if token_id == unk_token_id:
3867-
toktype = SentencePieceTokenTypes.UNKNOWN
3868-
elif token_id in tokenizer.all_special_ids:
3869-
toktype = SentencePieceTokenTypes.CONTROL
3870-
elif token_id in added_vocab.values():
3871-
toktype = SentencePieceTokenTypes.USER_DEFINED
3872-
# No reliable way to detect this, but jina doesn't have any
3873-
# elif tokenizer.IsByte(token_id):
3874-
# toktype = SentencePieceTokenTypes.BYTE
3875-
3876-
tokens[token_id] = text
3877-
scores[token_id] = score
3878-
toktypes[token_id] = toktype
3879-
3880-
if vocab_size > len(tokens):
3881-
pad_count = vocab_size - len(tokens)
3882-
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
3883-
for i in range(1, pad_count + 1):
3884-
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
3885-
scores.append(-1000.0)
3886-
toktypes.append(SentencePieceTokenTypes.UNUSED)
3862+
if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
3863+
text = piece.encode("utf-8")
3864+
score = tokenizer_json["model"]["vocab"][token_id][1]
3865+
3866+
toktype = SentencePieceTokenTypes.NORMAL
3867+
if token_id == unk_token_id:
3868+
toktype = SentencePieceTokenTypes.UNKNOWN
3869+
elif token_id in tokenizer.all_special_ids:
3870+
toktype = SentencePieceTokenTypes.CONTROL
3871+
elif token_id in added_vocab.values():
3872+
toktype = SentencePieceTokenTypes.USER_DEFINED
3873+
# No reliable way to detect this, but jina doesn't have any
3874+
# elif tokenizer.IsByte(token_id):
3875+
# toktype = SentencePieceTokenTypes.BYTE
3876+
3877+
tokens[token_id] = text
3878+
scores[token_id] = score
3879+
toktypes[token_id] = toktype
38873880

38883881
if isinstance(tokenizer, SentencePieceProcessor):
38893882
# realign tokens (see HF tokenizer code)
@@ -3896,6 +3889,12 @@ def _xlmroberta_set_vocab(self) -> None:
38963889
SentencePieceTokenTypes.UNKNOWN,
38973890
] + toktypes[3:-1]
38983891

3892+
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
3893+
# Add mask token missing from sentencepiece.bpe.model
3894+
tokens[250001] = b'<mask>'
3895+
scores[250001] = 0.0
3896+
toktypes[250001] = SentencePieceTokenTypes.CONTROL
3897+
38993898
self.gguf_writer.add_tokenizer_model("t5")
39003899
self.gguf_writer.add_tokenizer_pre("default")
39013900
self.gguf_writer.add_token_list(tokens)

examples/parallel/parallel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ int main(int argc, char ** argv) {
158158
common_params params;
159159

160160
params.n_predict = 128;
161-
params.n_junk = 0;
161+
params.n_junk = 1;
162162

163163
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) {
164164
return 1;
@@ -182,7 +182,7 @@ int main(int argc, char ** argv) {
182182
const bool is_sp_shared = params.is_pp_shared;
183183

184184
// extra text to insert in each client's prompt in order to make it larger
185-
const int32_t n_junk = params.n_junk;
185+
const int32_t n_junk = std::max(1, params.n_junk);
186186

187187
// init llama.cpp
188188
llama_backend_init();

ggml/include/ggml.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,9 +2095,6 @@ extern "C" {
20952095
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
20962096
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
20972097

2098-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2099-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2100-
21012098
// print info and performance information for the graph
21022099
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
21032100

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ add_library(ggml-base
196196
../include/ggml-opt.h
197197
../include/gguf.h
198198
ggml.c
199+
ggml.cpp
199200
ggml-alloc.c
200201
ggml-backend.cpp
201202
ggml-opt.cpp
@@ -226,6 +227,7 @@ function(ggml_add_backend_library backend)
226227
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
227228
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
228229
add_dependencies(ggml ${backend})
230+
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
229231
else()
230232
add_library(${backend} ${ARGN})
231233
target_link_libraries(ggml PUBLIC ${backend})

ggml/src/ggml-blas/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
8181
target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
8282
target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
8383
else()
84-
message(ERROR "BLAS not found, please refer to "
85-
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86-
" to set correct GGML_BLAS_VENDOR")
84+
message(FATAL_ERROR "BLAS not found, please refer to "
85+
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86+
" to set correct GGML_BLAS_VENDOR")
8787
endif()

ggml/src/ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
extern "C" {
3333
#endif
3434

35+
void ggml_print_backtrace(void);
36+
3537
#ifndef MIN
3638
# define MIN(a, b) ((a) < (b) ? (a) : (b))
3739
#endif

ggml/src/ggml-sycl/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ elseif(SUPPORTS_SYCL)
1313
If you expected the oneAPI Release compiler, please install oneAPI & source it, like:
1414
source /opt/intel/oneapi/setvars.sh")
1515
else()
16-
message(FATAL_ERROR, "C++ compiler lacks SYCL support.")
16+
message(FATAL_ERROR "C++ compiler lacks SYCL support.")
1717
endif()
1818
message(STATUS "SYCL found")
1919
#todo: AOT
@@ -170,7 +170,7 @@ else()
170170
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
171171
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
172172
if (NOT GGML_SYCL_DEVICE_ARCH)
173-
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
173+
message(FATAL_ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
174174
endif()
175175
target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
176176
target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1652,7 +1652,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_
16521652
return {64, 32};
16531653
}
16541654
return {64, 64};
1655-
};
1655+
}
16561656

16571657
static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {
16581658

ggml/src/ggml.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ static void ggml_print_backtrace_symbols(void) {
133133
}
134134
#endif
135135

136-
static void ggml_print_backtrace(void) {
136+
void ggml_print_backtrace(void) {
137137
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
138138
if (GGML_NO_BACKTRACE) {
139139
return;
@@ -160,13 +160,18 @@ static void ggml_print_backtrace(void) {
160160
const int parent_pid = getpid();
161161
const int child_pid = fork();
162162
if (child_pid < 0) { // error
163+
#if defined(__linux__)
164+
close(lock[1]);
165+
close(lock[0]);
166+
#endif
163167
return;
164168
} else if (child_pid == 0) { // child
165169
char attach[32];
166170
snprintf(attach, sizeof(attach), "attach %d", parent_pid);
167171
#if defined(__linux__)
168172
close(lock[1]);
169173
(void) !read(lock[0], lock, 1);
174+
close(lock[0]);
170175
#endif
171176
// try gdb
172177
execlp("gdb", "gdb", "--batch",
@@ -195,7 +200,7 @@ static void ggml_print_backtrace(void) {
195200
}
196201
}
197202
#else
198-
static void ggml_print_backtrace(void) {
203+
void ggml_print_backtrace(void) {
199204
// platform not supported
200205
}
201206
#endif
@@ -216,6 +221,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
216221
abort();
217222
}
218223

224+
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
225+
219226
//
220227
// logging
221228
//

0 commit comments

Comments
 (0)