Skip to content

Commit 267c139

Browse files
authored
common : refactor downloading system, handle mmproj with -hf option (#12694)
* (wip) refactor downloading system [no ci] * fix all examples * fix mmproj with -hf * gemma3: update readme * only handle mmproj in llava example * fix multi-shard download * windows: fix problem with std::min and std::max * fix 2
1 parent f423981 commit 267c139

File tree

19 files changed

+673
-635
lines changed

19 files changed

+673
-635
lines changed

common/arg.cpp

Lines changed: 604 additions & 71 deletions
Large diffs are not rendered by default.

common/common.cpp

Lines changed: 4 additions & 491 deletions
Large diffs are not rendered by default.

common/common.h

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,13 @@ struct common_params_sampling {
184184
std::string print() const;
185185
};
186186

187+
struct common_params_model {
188+
std::string path = ""; // model local path // NOLINT
189+
std::string url = ""; // model url to download // NOLINT
190+
std::string hf_repo = ""; // HF repo // NOLINT
191+
std::string hf_file = ""; // HF file // NOLINT
192+
};
193+
187194
struct common_params_speculative {
188195
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
189196

@@ -197,19 +204,11 @@ struct common_params_speculative {
197204
struct cpu_params cpuparams;
198205
struct cpu_params cpuparams_batch;
199206

200-
std::string hf_repo = ""; // HF repo // NOLINT
201-
std::string hf_file = ""; // HF file // NOLINT
202-
203-
std::string model = ""; // draft model for speculative decoding // NOLINT
204-
std::string model_url = ""; // model url to download // NOLINT
207+
struct common_params_model model;
205208
};
206209

207210
struct common_params_vocoder {
208-
std::string hf_repo = ""; // HF repo // NOLINT
209-
std::string hf_file = ""; // HF file // NOLINT
210-
211-
std::string model = ""; // model path // NOLINT
212-
std::string model_url = ""; // model url to download // NOLINT
211+
struct common_params_model model;
213212

214213
std::string speaker_file = ""; // speaker file path // NOLINT
215214

@@ -267,12 +266,10 @@ struct common_params {
267266
struct common_params_speculative speculative;
268267
struct common_params_vocoder vocoder;
269268

270-
std::string model = ""; // model path // NOLINT
269+
struct common_params_model model;
270+
271271
std::string model_alias = ""; // model alias // NOLINT
272-
std::string model_url = ""; // model url to download // NOLINT
273272
std::string hf_token = ""; // HF token // NOLINT
274-
std::string hf_repo = ""; // HF repo // NOLINT
275-
std::string hf_file = ""; // HF file // NOLINT
276273
std::string prompt = ""; // NOLINT
277274
std::string system_prompt = ""; // NOLINT
278275
std::string prompt_file = ""; // store the external prompt file name // NOLINT
@@ -347,7 +344,7 @@ struct common_params {
347344
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
348345

349346
// multimodal models (see examples/llava)
350-
std::string mmproj = ""; // path to multimodal projector // NOLINT
347+
struct common_params_model mmproj;
351348
std::vector<std::string> image; // path to image file(s)
352349

353350
// embedding
@@ -546,23 +543,6 @@ struct llama_model_params common_model_params_to_llama ( common_params
546543
struct llama_context_params common_context_params_to_llama(const common_params & params);
547544
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
548545

549-
struct llama_model * common_load_model_from_url(
550-
const std::string & model_url,
551-
const std::string & local_path,
552-
const std::string & hf_token,
553-
const struct llama_model_params & params);
554-
555-
struct llama_model * common_load_model_from_hf(
556-
const std::string & repo,
557-
const std::string & remote_path,
558-
const std::string & local_path,
559-
const std::string & hf_token,
560-
const struct llama_model_params & params);
561-
562-
std::pair<std::string, std::string> common_get_hf_file(
563-
const std::string & hf_repo_with_tag,
564-
const std::string & hf_token);
565-
566546
// clear LoRA adapters from context, then apply new list of adapters
567547
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
568548

examples/batched-bench/batched-bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ int main(int argc, char ** argv) {
3838

3939
llama_model_params model_params = common_model_params_to_llama(params);
4040

41-
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);
41+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
4242

4343
if (model == NULL) {
4444
fprintf(stderr , "%s: error: unable to load model\n" , __func__);

examples/batched/batched.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ int main(int argc, char ** argv) {
4141

4242
llama_model_params model_params = common_model_params_to_llama(params);
4343

44-
llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);
44+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
4545

4646
if (model == NULL) {
4747
LOG_ERR("%s: error: unable to load model\n" , __func__);

examples/export-lora/export-lora.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
421421

422422
g_verbose = (params.verbosity > 1);
423423
try {
424-
lora_merge_ctx ctx(params.model, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
424+
lora_merge_ctx ctx(params.model.path, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
425425
ctx.run_merge();
426426
} catch (const std::exception & err) {
427427
fprintf(stderr, "%s\n", err.what());

examples/gritlm/gritlm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ int main(int argc, char * argv[]) {
168168

169169
llama_backend_init();
170170

171-
llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams);
171+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
172172

173173
// create generation context
174174
llama_context * ctx = llama_init_from_model(model, cparams);

examples/llava/README-gemma3.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@
44
>
55
> This is very experimental, only used for demo purpose.
66
7+
## Quick started
8+
9+
You can use pre-quantized model from [ggml-org](https://huggingface.co/ggml-org)'s Hugging Face account
10+
11+
```bash
12+
# build
13+
cmake -B build
14+
cmake --build build --target llama-gemma3-cli
15+
16+
# alternatively, install from brew (MacOS)
17+
brew install llama.cpp
18+
19+
# run it
20+
llama-gemma3-cli -hf ggml-org/gemma-3-4b-it-GGUF
21+
llama-gemma3-cli -hf ggml-org/gemma-3-12b-it-GGUF
22+
llama-gemma3-cli -hf ggml-org/gemma-3-27b-it-GGUF
23+
24+
# note: 1B model does not support vision
25+
```
26+
727
## How to get mmproj.gguf?
828

929
```bash

examples/llava/gemma3-cli.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ struct gemma3_context {
7878
}
7979

8080
void init_clip_model(common_params & params) {
81-
const char * clip_path = params.mmproj.c_str();
81+
const char * clip_path = params.mmproj.path.c_str();
8282
ctx_clip = clip_model_load(clip_path, params.verbosity > 1);
8383
}
8484

@@ -232,13 +232,13 @@ int main(int argc, char ** argv) {
232232

233233
common_init();
234234

235-
if (params.mmproj.empty()) {
235+
if (params.mmproj.path.empty()) {
236236
show_additional_info(argc, argv);
237237
return 1;
238238
}
239239

240240
gemma3_context ctx(params);
241-
printf("%s: %s\n", __func__, params.model.c_str());
241+
printf("%s: %s\n", __func__, params.model.path.c_str());
242242

243243
bool is_single_turn = !params.prompt.empty() && !params.image.empty();
244244

examples/llava/llava-cli.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ static struct llama_model * llava_init(common_params * params) {
225225

226226
llama_model_params model_params = common_model_params_to_llama(*params);
227227

228-
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
228+
llama_model * model = llama_model_load_from_file(params->model.path.c_str(), model_params);
229229
if (model == NULL) {
230230
LOG_ERR("%s: unable to load model\n" , __func__);
231231
return NULL;
@@ -234,7 +234,7 @@ static struct llama_model * llava_init(common_params * params) {
234234
}
235235

236236
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
237-
const char * clip_path = params->mmproj.c_str();
237+
const char * clip_path = params->mmproj.path.c_str();
238238

239239
auto prompt = params->prompt;
240240
if (prompt.empty()) {
@@ -283,7 +283,7 @@ int main(int argc, char ** argv) {
283283

284284
common_init();
285285

286-
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
286+
if (params.mmproj.path.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
287287
print_usage(argc, argv);
288288
return 1;
289289
}

0 commit comments

Comments
 (0)