menloresearch
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎convert_hf_to_gguf.py
Lines changed: 27 additions & 28 deletions b/‎convert_hf_to_gguf.py
Lines changed: 27 additions & 28 deletions
diff --git a/‎examples/parallel/parallel.cpp
Lines changed: 2 additions & 2 deletions b/‎examples/parallel/parallel.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml/include/ggml.h
Lines changed: 0 additions & 3 deletions b/‎ggml/include/ggml.h
Lines changed: 0 additions & 3 deletions
diff --git a/‎ggml/src/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎ggml/src/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-blas/CMakeLists.txt
Lines changed: 3 additions & 3 deletions b/‎ggml/src/ggml-blas/CMakeLists.txt
Lines changed: 3 additions & 3 deletions
diff --git a/‎ggml/src/ggml-impl.h
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-impl.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-sycl/CMakeLists.txt
Lines changed: 2 additions & 2 deletions b/‎ggml/src/ggml-sycl/CMakeLists.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml/src/ggml-vulkan/ggml-vulkan.cpp
Lines changed: 1 addition & 1 deletion b/‎ggml/src/ggml-vulkan/ggml-vulkan.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎ggml/src/ggml.c
Lines changed: 9 additions & 2 deletions b/‎ggml/src/ggml.c
Lines changed: 9 additions & 2 deletions
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 <details>
 <summary>Bindings</summary>
 
+- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
 - Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
 - Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
 - Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
 
@@ -3814,7 +3814,7 @@ def _xlmroberta_set_vocab(self) -> None:
             remove_whitespaces = tokenizer.clean_up_tokenization_spaces
             precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
 
-            vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
+            vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
         else:
             sentencepiece_model = model.ModelProto()  # pyright: ignore[reportAttributeAccessIssue]
             sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
@@ -3827,7 +3827,7 @@ def _xlmroberta_set_vocab(self) -> None:
             tokenizer = SentencePieceProcessor()
             tokenizer.LoadFromFile(str(tokenizer_path))
 
-            vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+            vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
 
         tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
         scores: list[float] = [-10000.0] * vocab_size
@@ -3857,33 +3857,26 @@ def _xlmroberta_set_vocab(self) -> None:
             unk_token = tokenizer_config_json.get("unk_token")
             unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
 
-            for token_id in range(vocab_size):
+            for token_id in range(tokenizer.vocab_size):
                 piece = tokenizer._convert_id_to_token(token_id)
-                text = piece.encode("utf-8")
-                score = tokenizer_json["model"]["vocab"][token_id][1]
-
-                toktype = SentencePieceTokenTypes.NORMAL
-                if token_id == unk_token_id:
-                    toktype = SentencePieceTokenTypes.UNKNOWN
-                elif token_id in tokenizer.all_special_ids:
-                    toktype = SentencePieceTokenTypes.CONTROL
-                elif token_id in added_vocab.values():
-                    toktype = SentencePieceTokenTypes.USER_DEFINED
-                # No reliable way to detect this, but jina doesn't have any
-                # elif tokenizer.IsByte(token_id):
-                #     toktype = SentencePieceTokenTypes.BYTE
-
-                tokens[token_id] = text
-                scores[token_id] = score
-                toktypes[token_id] = toktype
-
-        if vocab_size > len(tokens):
-            pad_count = vocab_size - len(tokens)
-            logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
-            for i in range(1, pad_count + 1):
-                tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
-                scores.append(-1000.0)
-                toktypes.append(SentencePieceTokenTypes.UNUSED)
+                if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
+                    text = piece.encode("utf-8")
+                    score = tokenizer_json["model"]["vocab"][token_id][1]
+
+                    toktype = SentencePieceTokenTypes.NORMAL
+                    if token_id == unk_token_id:
+                        toktype = SentencePieceTokenTypes.UNKNOWN
+                    elif token_id in tokenizer.all_special_ids:
+                        toktype = SentencePieceTokenTypes.CONTROL
+                    elif token_id in added_vocab.values():
+                        toktype = SentencePieceTokenTypes.USER_DEFINED
+                    # No reliable way to detect this, but jina doesn't have any
+                    # elif tokenizer.IsByte(token_id):
+                    #     toktype = SentencePieceTokenTypes.BYTE
+
+                    tokens[token_id] = text
+                    scores[token_id] = score
+                    toktypes[token_id] = toktype
 
         if isinstance(tokenizer, SentencePieceProcessor):
             # realign tokens (see HF tokenizer code)
@@ -3896,6 +3889,12 @@ def _xlmroberta_set_vocab(self) -> None:
                 SentencePieceTokenTypes.UNKNOWN,
             ] + toktypes[3:-1]
 
+            if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
+                # Add mask token missing from sentencepiece.bpe.model
+                tokens[250001] = b'<mask>'
+                scores[250001] = 0.0
+                toktypes[250001] = SentencePieceTokenTypes.CONTROL
+
         self.gguf_writer.add_tokenizer_model("t5")
         self.gguf_writer.add_tokenizer_pre("default")
         self.gguf_writer.add_token_list(tokens)
 
@@ -158,7 +158,7 @@ int main(int argc, char ** argv) {
     common_params params;
 
     params.n_predict = 128;
-    params.n_junk = 0;
+    params.n_junk = 1;
 
     if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) {
         return 1;
@@ -182,7 +182,7 @@ int main(int argc, char ** argv) {
     const bool is_sp_shared = params.is_pp_shared;
 
     // extra text to insert in each client's prompt in order to make it larger
-    const int32_t n_junk = params.n_junk;
+    const int32_t n_junk = std::max(1, params.n_junk);
 
     // init llama.cpp
     llama_backend_init();
 
@@ -2095,9 +2095,6 @@ extern "C" {
     GGML_API struct ggml_tensor * ggml_graph_get_grad    (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
     GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
 
-    GGML_API void                 ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
-    GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
-
     // print info and performance information for the graph
     GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
 
 
@@ -196,6 +196,7 @@ add_library(ggml-base
             ../include/ggml-opt.h
             ../include/gguf.h
             ggml.c
+            ggml.cpp
             ggml-alloc.c
             ggml-backend.cpp
             ggml-opt.cpp
@@ -226,6 +227,7 @@ function(ggml_add_backend_library backend)
         set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
         target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
         add_dependencies(ggml ${backend})
+        install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
     else()
         add_library(${backend} ${ARGN})
         target_link_libraries(ggml PUBLIC ${backend})
 
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
     target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES})
     target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
 else()
-    message(ERROR "BLAS not found, please refer to "
-                  "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
-                  " to set correct GGML_BLAS_VENDOR")
+    message(FATAL_ERROR "BLAS not found, please refer to "
+                        "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
+                        " to set correct GGML_BLAS_VENDOR")
 endif()
@@ -32,6 +32,8 @@
 extern "C" {
 #endif
 
+void ggml_print_backtrace(void);
+
 #ifndef MIN
 #    define MIN(a, b) ((a) < (b) ? (a) : (b))
 #endif
 
@@ -13,7 +13,7 @@ elseif(SUPPORTS_SYCL)
         If you expected the oneAPI Release compiler, please install oneAPI & source it, like:
         source /opt/intel/oneapi/setvars.sh")
 else()
-    message(FATAL_ERROR, "C++ compiler lacks SYCL support.")
+    message(FATAL_ERROR "C++ compiler lacks SYCL support.")
 endif()
 message(STATUS "SYCL found")
 #todo: AOT
@@ -170,7 +170,7 @@ else()
         target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
     elseif (GGML_SYCL_TARGET STREQUAL "AMD")
         if (NOT GGML_SYCL_DEVICE_ARCH)
-            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
+            message(FATAL_ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
         endif()
         target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
         target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
 
@@ -1652,7 +1652,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_
         return {64, 32};
     }
     return {64, 64};
-};
+}
 
 static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {
 
 
@@ -133,7 +133,7 @@ static void ggml_print_backtrace_symbols(void) {
 }
 #endif
 
-static void ggml_print_backtrace(void) {
+void ggml_print_backtrace(void) {
     const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
     if (GGML_NO_BACKTRACE) {
         return;
@@ -160,13 +160,18 @@ static void ggml_print_backtrace(void) {
     const int parent_pid = getpid();
     const int child_pid = fork();
     if (child_pid < 0) { // error
+#if defined(__linux__)
+        close(lock[1]);
+        close(lock[0]);
+#endif
         return;
     } else if (child_pid == 0) { // child
         char attach[32];
         snprintf(attach, sizeof(attach), "attach %d", parent_pid);
 #if defined(__linux__)
         close(lock[1]);
         (void) !read(lock[0], lock, 1);
+        close(lock[0]);
 #endif
         // try gdb
         execlp("gdb", "gdb", "--batch",
@@ -195,7 +200,7 @@ static void ggml_print_backtrace(void) {
     }
 }
 #else
-static void ggml_print_backtrace(void) {
+void ggml_print_backtrace(void) {
     // platform not supported
 }
 #endif
@@ -216,6 +221,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
     abort();
 }
 
+// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
+
 //
 // logging
 //
Original file line number	Diff line number	Diff line change
`@@ -1652,7 +1652,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_`
`1652`	`1652`	`return {64, 32};`
`1653`	`1653`	`}`
`1654`	`1654`	`return {64, 64};`
`1655`		`-};`
	`1655`	`+}`
`1656`	`1656`
`1657`	`1657`	`static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {`
`1658`	`1658`