Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2634,6 +2634,31 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.hf_token = value;
}
).set_env("HF_TOKEN"));
add_opt(common_arg(
{"-ms", "-msr", "--ms-repo"}, "<user>/<model>[:quant]",
"ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
"mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n"
"example: user/model:Q4_K_M\n"
"(default: unused)",
[](common_params & params, const std::string & value) {
params.model.hf_repo = value;
params.model.repo_type = LLAMA_REPO_TYPE_MS;
}
).set_env("LLAMA_ARG_MS_REPO"));
add_opt(common_arg(
{"-msf", "--ms-file"}, "FILE",
"ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)",
[](common_params & params, const std::string & value) {
params.model.hf_file = value;
}
).set_env("LLAMA_ARG_MS_FILE"));
add_opt(common_arg(
{"-mst", "--ms-token"}, "TOKEN",
"ModelScope access token (default: value from MS_TOKEN environment variable)",
[](common_params & params, const std::string & value) {
params.hf_token = value;
}
).set_env("MS_TOKEN"));
add_opt(common_arg(
{"--context-file"}, "FNAME",
"file to load context from (use comma-separated values to specify multiple files)",
Expand Down
25 changes: 18 additions & 7 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1381,18 +1381,29 @@ common_init_result_ptr common_init_from_params(common_params & params) {

common_init_result::~common_init_result() = default;

std::string get_model_endpoint() {
std::string get_model_endpoint(llama_repo_type type) {
const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
// We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
std::string model_endpoint = "https://huggingface.co/";
if (endpoint_env) {
model_endpoint = endpoint_env;
if (model_endpoint.back() != '/') {
model_endpoint += '/';
std::string model_endpoint;

if (type == LLAMA_REPO_TYPE_MS) {
if (model_endpoint_env && *model_endpoint_env) {
model_endpoint = model_endpoint_env;
} else {
model_endpoint = "https://www.modelscope.cn/";
}
} else {
const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
if (endpoint_env && *endpoint_env) {
model_endpoint = endpoint_env;
} else {
model_endpoint = "https://huggingface.co/";
}
}
if (model_endpoint.back() != '/') {
model_endpoint += '/';
}
return model_endpoint;
}

Expand Down
10 changes: 9 additions & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
#include <vector>
#include <map>

// Repository type enumeration
enum llama_repo_type {
LLAMA_REPO_TYPE_HF, // Hugging Face
LLAMA_REPO_TYPE_MS, // ModelScope
};

#if defined(_WIN32) && !defined(_WIN32_WINNT)
#define _WIN32_WINNT 0x0A00
#endif
Expand Down Expand Up @@ -305,6 +311,7 @@ struct common_params_model {
std::string hf_file = ""; // HF file // NOLINT
std::string docker_repo = ""; // Docker repo // NOLINT
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
enum llama_repo_type repo_type = LLAMA_REPO_TYPE_HF; // repository type for model downloads // NOLINT
};

struct common_ngram_mod;
Expand Down Expand Up @@ -467,6 +474,7 @@ struct common_params {
std::set<std::string> model_alias; // model aliases // NOLINT
std::set<std::string> model_tags; // model tags (informational, not used for routing) // NOLINT
std::string hf_token = ""; // HF token // NOLINT
llama_repo_type repo_type = LLAMA_REPO_TYPE_HF; // repository type (HF or ModelScope) // NOLINT
std::string prompt = ""; // NOLINT
std::string system_prompt = ""; // NOLINT
std::string prompt_file = ""; // store the external prompt file name // NOLINT
Expand Down Expand Up @@ -859,7 +867,7 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
// clear LoRA adapters from context, then apply new list of adapters
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);

std::string get_model_endpoint();
std::string get_model_endpoint(llama_repo_type type = LLAMA_REPO_TYPE_HF);

//
// Batch utils
Expand Down
2 changes: 1 addition & 1 deletion common/download.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ static hf_plan get_hf_plan(const common_params_model & model,
auto [repo, tag] = common_download_split_repo_tag(model.hf_repo);

if (!opts.offline) {
all = hf_cache::get_repo_files(repo, opts.bearer_token);
all = hf_cache::get_repo_files(repo, opts.bearer_token, model.repo_type);
}
if (all.empty()) {
all = hf_cache::get_cached_files(repo);
Expand Down
180 changes: 126 additions & 54 deletions common/hf-cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,28 @@ static void safe_write_file(const fs::path & path, const std::string & data) {
}

static nl::json api_get(const std::string & url,
const std::string & token) {
const std::string & token,
llama_repo_type type = LLAMA_REPO_TYPE_HF) {
auto [cli, parts] = common_http_client(url);

// Unified User-Agent for consistency
httplib::Headers headers = {
{"User-Agent", "llama-cpp/" + build_info},
{"Accept", "application/json"}
};

if (is_valid_hf_token(token)) {
headers.emplace("Authorization", "Bearer " + token);
} else if (!token.empty()) {
LOG_WRN("%s: invalid token, authentication disabled\n", __func__);
if (!token.empty()) {
if (type == LLAMA_REPO_TYPE_MS) {
// ModelScope: Cookie Auth
headers.emplace("Cookie", "m_session_id=" + token);
} else {
// Hugging Face: Bearer Auth
if (is_valid_hf_token(token)) {
headers.emplace("Authorization", "Bearer " + token);
} else {
LOG_WRN("%s: invalid HF token, authentication disabled\n", __func__);
}
}
}

if (auto res = cli.Get(parts.path, headers)) {
Expand All @@ -227,11 +237,18 @@ static nl::json api_get(const std::string & url,
}

static std::string get_repo_commit(const std::string & repo_id,
const std::string & token) {
try {
auto endpoint = get_model_endpoint();
auto json = api_get(endpoint + "api/models/" + repo_id + "/refs", token);
const std::string & token,
llama_repo_type type = LLAMA_REPO_TYPE_HF) {

// MS does not support /refs API, default to master
if (type == LLAMA_REPO_TYPE_MS) {
return "master";
}

// Original Hugging Face logic
try {
std::string endpoint = get_model_endpoint(type);
auto json = api_get(endpoint + "api/models/" + repo_id + "/refs", token, type);
if (!json.is_object() ||
!json.contains("branches") || !json["branches"].is_array()) {
LOG_WRN("%s: missing 'branches' for '%s'\n", __func__, repo_id.c_str());
Expand Down Expand Up @@ -289,13 +306,14 @@ static std::string get_repo_commit(const std::string & repo_id,
}

hf_files get_repo_files(const std::string & repo_id,
const std::string & token) {
const std::string & token,
llama_repo_type type) {
if (!is_valid_repo_id(repo_id)) {
LOG_WRN("%s: invalid repository: %s\n", __func__, repo_id.c_str());
return {};
}

std::string commit = get_repo_commit(repo_id, token);
std::string commit = get_repo_commit(repo_id, token, type);
if (commit.empty()) {
LOG_WRN("%s: failed to resolve commit for %s\n", __func__, repo_id.c_str());
return {};
Expand All @@ -307,63 +325,110 @@ hf_files get_repo_files(const std::string & repo_id,
hf_files files;

try {
auto endpoint = get_model_endpoint();
auto json = api_get(endpoint + "api/models/" + repo_id + "/tree/" + commit + "?recursive=true", token);
std::string endpoint = get_model_endpoint(type);
nl::json json;

if (!json.is_array()) {
LOG_WRN("%s: response is not an array for '%s'\n", __func__, repo_id.c_str());
return {};
}
if (type == LLAMA_REPO_TYPE_MS) {
// --- ModelScope Logic ---
std::string url = endpoint + "api/v1/models/" + repo_id + "/repo/files?Revision=" + commit + "&Recursive=True";
json = api_get(url, token, type);

for (const auto & item : json) {
if (!item.is_object() ||
!item.contains("type") || !item["type"].is_string() || item["type"] != "file" ||
!item.contains("path") || !item["path"].is_string()) {
continue;
if (json.contains("Code") && json["Code"] != 200) {
throw std::runtime_error("ModelScope API Error: " + json.value("Message", "Unknown"));
}

hf_file file;
file.repo_id = repo_id;
file.path = item["path"].get<std::string>();

if (!is_valid_subpath(commit_path, file.path)) {
LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str());
continue;
if (!json.contains("Data") || !json["Data"].contains("Files") || !json["Data"]["Files"].is_array()) {
return {};
}

if (item.contains("lfs") && item["lfs"].is_object()) {
if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) {
file.oid = item["lfs"]["oid"].get<std::string>();
for (const auto & item : json["Data"]["Files"]) {
if (!item.contains("Path") || !item["Path"].is_string()) continue;

hf_file file;
file.repo_id = repo_id;
file.path = item["Path"].get<std::string>();

if (!is_valid_subpath(commit_path, file.path)) {
LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str());
continue;
}

if (item.contains("Size") && item["Size"].is_number_unsigned()) {
file.size = item["Size"].get<size_t>();
}
if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) {
file.size = item["lfs"]["size"].get<size_t>();
if (item.contains("Sha256") && item["Sha256"].is_string()) {
file.oid = item["Sha256"].get<std::string>();
} else if (item.contains("Revision") && item["Revision"].is_string()) {
file.oid = item["Revision"].get<std::string>();
}
} else if (item.contains("oid") && item["oid"].is_string()) {
file.oid = item["oid"].get<std::string>();
}
if (file.size == 0 && item.contains("size") && item["size"].is_number()) {
file.size = item["size"].get<size_t>();

// MS Download URL: /models/{repo}/resolve/{commit}/{path}
file.url = endpoint + "models/" + repo_id + "/resolve/" + commit + "/" + file.path;
file.final_path = (commit_path / file.path).string();
file.local_path = file.oid.empty() ? file.final_path : (blobs_path / file.oid).string();

files.push_back(std::move(file));
}

if (!file.oid.empty() && !is_valid_oid(file.oid)) {
LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str());
continue;
} else {
// Original Hugging Face Logic
auto json = api_get(endpoint + "api/models/" + repo_id + "/tree/" + commit + "?recursive=true", token, type);

if (!json.is_array()) {
LOG_WRN("%s: response is not an array for '%s'\n", __func__, repo_id.c_str());
return {};
}

file.url = endpoint + repo_id + "/resolve/" + commit + "/" + file.path;
for (const auto & item : json) {
if (!item.is_object() ||
!item.contains("type") || !item["type"].is_string() || item["type"] != "file" ||
!item.contains("path") || !item["path"].is_string()) {
continue;
}

fs::path final_path = commit_path / file.path;
file.final_path = final_path.string();
hf_file file;
file.repo_id = repo_id;
file.path = item["path"].get<std::string>();

if (!file.oid.empty() && !fs::exists(final_path)) {
fs::path local_path = blobs_path / file.oid;
file.local_path = local_path.string();
} else {
file.local_path = file.final_path;
}
if (!is_valid_subpath(commit_path, file.path)) {
LOG_WRN("%s: skip invalid path: %s\n", __func__, file.path.c_str());
continue;
}

files.push_back(file);
if (item.contains("lfs") && item["lfs"].is_object()) {
if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) {
file.oid = item["lfs"]["oid"].get<std::string>();
}
if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) {
file.size = item["lfs"]["size"].get<size_t>();
}
} else if (item.contains("oid") && item["oid"].is_string()) {
file.oid = item["oid"].get<std::string>();
}
if (file.size == 0 && item.contains("size") && item["size"].is_number()) {
file.size = item["size"].get<size_t>();
}

if (!file.oid.empty() && !is_valid_oid(file.oid)) {
LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str());
continue;
}

file.url = endpoint + repo_id + "/resolve/" + commit + "/" + file.path;

fs::path final_path = commit_path / file.path;
file.final_path = final_path.string();

if (!file.oid.empty() && !fs::exists(final_path)) {
fs::path local_path = blobs_path / file.oid;
file.local_path = local_path.string();
} else {
file.local_path = file.final_path;
}

files.push_back(file);
}
}

} catch (const nl::json::exception & e) {
LOG_ERR("%s: JSON error: %s\n", __func__, e.what());
} catch (const std::exception & e) {
Expand All @@ -372,6 +437,13 @@ hf_files get_repo_files(const std::string & repo_id,
return files;
}


// Backward-compatible overload defaulting to HF
static hf_files get_repo_files(const std::string & repo_id,
const std::string & token) {
return get_repo_files(repo_id, token, LLAMA_REPO_TYPE_HF);
}

static std::string get_cached_ref(const fs::path & repo_path) {
fs::path refs_path = repo_path / "refs";
if (!fs::is_directory(refs_path)) {
Expand Down Expand Up @@ -720,7 +792,7 @@ void migrate_old_cache_to_hf_cache(const std::string & token, bool offline) {
}

auto repo_id = owner + "/" + repo;
auto files = get_repo_files(repo_id, token);
auto files = get_repo_files(repo_id, token, LLAMA_REPO_TYPE_HF);

if (files.empty()) {
LOG_WRN("%s: could not get repo files for %s, skipping\n", __func__, repo_id.c_str());
Expand Down
5 changes: 4 additions & 1 deletion common/hf-cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

// Ref: https://huggingface.co/docs/hub/local-cache.md

#include "common.h" // for llama_repo_type

namespace hf_cache {

struct hf_file {
Expand All @@ -22,7 +24,8 @@ using hf_files = std::vector<hf_file>;
// Get files from HF API
hf_files get_repo_files(
const std::string & repo_id,
const std::string & token
const std::string & token,
llama_repo_type type = LLAMA_REPO_TYPE_HF
);

hf_files get_cached_files(const std::string & repo_id = {});
Expand Down
3 changes: 3 additions & 0 deletions tools/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@
| `-hfv, -hfrv, --hf-repo-v <user>/<model>[:quant]` | Hugging Face model repository for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_REPO_V) |
| `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_FILE_V) |
| `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)<br/>(env: HF_TOKEN) |
| `-ms, -msr, --ms-repo <user>/<model>[:quant]` | ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.<br/>mmproj is also downloaded automatically if available. to disable, add --no-mmproj<br/>example: Qwen/Qwen3-4B-GGUF:Q4_K_M<br/>(default: unused)<br/>(env: LLAMA_ARG_MS_REPO) |
| `-msf, --ms-file FILE` | ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)<br/>(env: LLAMA_ARG_MS_FILE) |
| `-mst, --ms-token TOKEN` | ModelScope access token (default: value from MS_TOKEN environment variable)<br/>(env: MS_TOKEN) |
| `--log-disable` | Log disable |
| `--log-file FNAME` | Log to file<br/>(env: LLAMA_LOG_FILE) |
| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal<br/>(env: LLAMA_LOG_COLORS) |
Expand Down
Loading