Skip to content

Commit 6b7d13f

Browse files
Merge branch 'ggml-org:master' into master
2 parents 3cd3d5d + 2d451c8 commit 6b7d13f

File tree

14 files changed

+488
-390
lines changed

14 files changed

+488
-390
lines changed

common/arg.cpp

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ struct common_hf_file_res {
162162

163163
#ifdef LLAMA_USE_CURL
164164

165+
bool common_has_curl() {
166+
return true;
167+
}
168+
165169
#ifdef __linux__
166170
#include <linux/limits.h>
167171
#elif defined(_WIN32)
@@ -527,64 +531,89 @@ static bool common_download_model(
527531
return true;
528532
}
529533

530-
/**
531-
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
532-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
533-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
534-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
535-
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
536-
*
537-
* Return pair of <repo, file> (with "repo" already having tag removed)
538-
*
539-
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
540-
*/
541-
static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
542-
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
543-
std::string tag = parts.size() > 1 ? parts.back() : "latest";
544-
std::string hf_repo = parts[0];
545-
if (string_split<std::string>(hf_repo, '/').size() != 2) {
546-
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
547-
}
548-
549-
// fetch model info from Hugging Face Hub API
534+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
550535
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
551536
curl_slist_ptr http_headers;
552-
std::string res_str;
537+
std::vector<char> res_buffer;
553538

554-
std::string model_endpoint = get_model_endpoint();
555-
556-
std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
557539
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
558540
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
541+
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
559542
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
560543
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
561-
static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb);
544+
auto data_vec = static_cast<std::vector<char> *>(data);
545+
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
562546
return size * nmemb;
563547
};
564548
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
565-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str);
549+
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
566550
#if defined(_WIN32)
567551
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
568552
#endif
569-
if (!bearer_token.empty()) {
570-
std::string auth_header = "Authorization: Bearer " + bearer_token;
571-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
553+
if (params.timeout > 0) {
554+
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
555+
}
556+
if (params.max_size > 0) {
557+
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
572558
}
573-
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
574559
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
575-
http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
560+
for (const auto & header : params.headers) {
561+
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
562+
}
576563
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
577564

578565
CURLcode res = curl_easy_perform(curl.get());
579566

580567
if (res != CURLE_OK) {
581-
throw std::runtime_error("error: cannot make GET request to HF API");
568+
std::string error_msg = curl_easy_strerror(res);
569+
throw std::runtime_error("error: cannot make GET request: " + error_msg);
582570
}
583571

584572
long res_code;
585-
std::string ggufFile = "";
586-
std::string mmprojFile = "";
587573
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
574+
575+
return { res_code, std::move(res_buffer) };
576+
}
577+
578+
/**
579+
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
580+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
581+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
582+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
583+
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
584+
*
585+
* Return pair of <repo, file> (with "repo" already having tag removed)
586+
*
587+
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
588+
*/
589+
static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
590+
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
591+
std::string tag = parts.size() > 1 ? parts.back() : "latest";
592+
std::string hf_repo = parts[0];
593+
if (string_split<std::string>(hf_repo, '/').size() != 2) {
594+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
595+
}
596+
597+
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
598+
599+
// headers
600+
std::vector<std::string> headers;
601+
headers.push_back("Accept: application/json");
602+
if (!bearer_token.empty()) {
603+
headers.push_back("Authorization: Bearer " + bearer_token);
604+
}
605+
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
606+
// User-Agent header is already set in common_remote_get_content, no need to set it here
607+
608+
// make the request
609+
common_remote_params params;
610+
params.headers = headers;
611+
auto res = common_remote_get_content(url, params);
612+
long res_code = res.first;
613+
std::string res_str(res.second.data(), res.second.size());
614+
std::string ggufFile;
615+
std::string mmprojFile;
616+
588617
if (res_code == 200) {
589618
// extract ggufFile.rfilename in json, using regex
590619
{
@@ -618,6 +647,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
618647

619648
#else
620649

650+
bool common_has_curl() {
651+
return false;
652+
}
653+
621654
static bool common_download_file_single(const std::string &, const std::string &, const std::string &) {
622655
LOG_ERR("error: built without CURL, cannot download model from internet\n");
623656
return false;
@@ -640,6 +673,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
640673
return {};
641674
}
642675

676+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
677+
throw std::runtime_error("error: built without CURL, cannot download model from the internet");
678+
}
679+
643680
#endif // LLAMA_USE_CURL
644681

645682
//

common/arg.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,12 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
7878

7979
// function to be used by test-arg-parser
8080
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81+
bool common_has_curl();
82+
83+
struct common_remote_params {
84+
std::vector<std::string> headers;
85+
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
86+
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
87+
};
88+
// get remote file content, returns <http_code, raw_response_body>
89+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);

common/json-schema-to-grammar.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ using json = nlohmann::ordered_json;
1616
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
1717
auto has_max = max_items != std::numeric_limits<int>::max();
1818

19+
if (max_items == 0) {
20+
return "";
21+
}
1922
if (min_items == 0 && max_items == 1) {
2023
return item_rule + "?";
2124
}

examples/json_schema_to_grammar.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010

1111
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
1212

13+
if max_items == 0:
14+
return ""
15+
1316
if min_items == 0 and max_items == 1:
1417
return f'{item_rule}?'
1518

examples/llava/clip-impl.h

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,15 @@
1717
#define KEY_FTYPE "general.file_type"
1818
#define KEY_NAME "general.name"
1919
#define KEY_DESCRIPTION "general.description"
20-
#define KEY_HAS_TEXT_ENC "clip.has_text_encoder"
21-
#define KEY_HAS_VIS_ENC "clip.has_vision_encoder"
22-
#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector"
23-
#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector"
24-
#define KEY_HAS_GLM_PROJ "clip.has_glm_projector"
2520
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
26-
#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger"
2721
#define KEY_USE_GELU "clip.use_gelu"
2822
#define KEY_USE_SILU "clip.use_silu"
29-
#define KEY_N_EMBD "clip.%s.embedding_length"
30-
#define KEY_N_FF "clip.%s.feed_forward_length"
31-
#define KEY_N_BLOCK "clip.%s.block_count"
32-
#define KEY_N_HEAD "clip.%s.attention.head_count"
33-
#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon"
34-
#define KEY_PROJ_DIM "clip.%s.projection_dim"
35-
#define KEY_TOKENS "tokenizer.ggml.tokens"
23+
#define KEY_N_EMBD "clip.vision.embedding_length"
24+
#define KEY_N_FF "clip.vision.feed_forward_length"
25+
#define KEY_N_BLOCK "clip.vision.block_count"
26+
#define KEY_N_HEAD "clip.vision.attention.head_count"
27+
#define KEY_LAYER_NORM_EPS "clip.vision.attention.layer_norm_epsilon"
28+
#define KEY_PROJ_DIM "clip.vision.projection_dim"
3629
#define KEY_IMAGE_SIZE "clip.vision.image_size"
3730
#define KEY_PATCH_SIZE "clip.vision.patch_size"
3831
#define KEY_IMAGE_MEAN "clip.vision.image_mean"
@@ -96,9 +89,9 @@ enum projector_type {
9689
PROJECTOR_TYPE_MLP_NORM,
9790
PROJECTOR_TYPE_LDP,
9891
PROJECTOR_TYPE_LDPV2,
99-
PROJECTOR_TYPE_RESAMPLER,
92+
PROJECTOR_TYPE_MINICPMV,
10093
PROJECTOR_TYPE_GLM_EDGE,
101-
PROJECTOR_TYPE_MERGER,
94+
PROJECTOR_TYPE_QWEN2VL,
10295
PROJECTOR_TYPE_GEMMA3,
10396
PROJECTOR_TYPE_IDEFICS3,
10497
PROJECTOR_TYPE_PIXTRAL,
@@ -109,9 +102,9 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
109102
{ PROJECTOR_TYPE_MLP, "mlp" },
110103
{ PROJECTOR_TYPE_LDP, "ldp" },
111104
{ PROJECTOR_TYPE_LDPV2, "ldpv2"},
112-
{ PROJECTOR_TYPE_RESAMPLER, "resampler"},
105+
{ PROJECTOR_TYPE_MINICPMV, "resampler"},
113106
{ PROJECTOR_TYPE_GLM_EDGE, "adapter"},
114-
{ PROJECTOR_TYPE_MERGER, "qwen2vl_merger"},
107+
{ PROJECTOR_TYPE_QWEN2VL, "qwen2vl_merger"},
115108
{ PROJECTOR_TYPE_GEMMA3, "gemma3"},
116109
{ PROJECTOR_TYPE_IDEFICS3, "idefics3"},
117110
{ PROJECTOR_TYPE_PIXTRAL, "pixtral"},

0 commit comments

Comments
 (0)