14
14
namespace hu = huggingface_utils;
15
15
16
16
namespace {
17
+ constexpr const int kModeSourceCacheSecs = 600 ;
18
+
19
+ std::string GenSourceId (const std::string& author_hub,
20
+ const std::string& model_name) {
21
+ return author_hub + " /" + model_name;
22
+ }
23
+
17
24
std::vector<ModelInfo> ParseJsonString (const std::string& json_str) {
18
25
std::vector<ModelInfo> models;
19
26
@@ -79,19 +86,34 @@ cpp::result<bool, std::string> ModelSourceService::AddModelSource(
79
86
}
80
87
81
88
if (auto is_org = r.pathParams .size () == 1 ; is_org) {
82
- auto & author = r.pathParams [0 ];
83
- if (author == " cortexso" ) {
84
- return AddCortexsoOrg (model_source);
85
- } else {
86
- return AddHfOrg (model_source, author);
87
- }
89
+ return cpp::fail (" Only support repository model source, url: " +
90
+ model_source);
91
+ // TODO(sang)
92
+ // auto& hub_author = r.pathParams[0];
93
+ // if (hub_author == "cortexso") {
94
+ // return AddCortexsoOrg(model_source);
95
+ // } else {
96
+ // return AddHfOrg(model_source, hub_author);
97
+ // }
88
98
} else { // Repo
89
- auto const & author = r.pathParams [0 ];
99
+ auto const & hub_author = r.pathParams [0 ];
90
100
auto const & model_name = r.pathParams [1 ];
101
+ // Return cache value
102
+ if (auto key = GenSourceId (hub_author, model_name);
103
+ src_cache_.find (key) != src_cache_.end ()) {
104
+ auto now = std::chrono::system_clock::now ();
105
+ if (std::chrono::duration_cast<std::chrono::seconds>(now -
106
+ src_cache_.at (key))
107
+ .count () < kModeSourceCacheSecs ) {
108
+ CTL_DBG (" Return cache value for model source: " << model_source);
109
+ return true ;
110
+ }
111
+ }
112
+
91
113
if (r.pathParams [0 ] == " cortexso" ) {
92
- return AddCortexsoRepo (model_source, author , model_name);
114
+ return AddCortexsoRepo (model_source, hub_author , model_name);
93
115
} else {
94
- return AddHfRepo (model_source, author , model_name);
116
+ return AddHfRepo (model_source, hub_author , model_name);
95
117
}
96
118
}
97
119
}
@@ -190,9 +212,9 @@ cpp::result<ModelSource, std::string> ModelSourceService::GetModelSource(
190
212
}
191
213
192
214
cpp::result<std::vector<std::string>, std::string>
193
- ModelSourceService::GetRepositoryList (std::string_view author ,
215
+ ModelSourceService::GetRepositoryList (std::string_view hub_author ,
194
216
std::string_view tag_filter) {
195
- std::string as (author );
217
+ std::string as (hub_author );
196
218
auto get_repo_list = [this , &as, &tag_filter] {
197
219
std::vector<std::string> repo_list;
198
220
auto const & mis = cortexso_repos_.at (as);
@@ -227,9 +249,9 @@ ModelSourceService::GetRepositoryList(std::string_view author,
227
249
}
228
250
229
251
cpp::result<bool , std::string> ModelSourceService::AddHfOrg (
230
- const std::string& model_source, const std::string& author ) {
252
+ const std::string& model_source, const std::string& hub_author ) {
231
253
auto res = curl_utils::SimpleGet (" https://huggingface.co/api/models?author=" +
232
- author );
254
+ hub_author );
233
255
if (res.has_value ()) {
234
256
auto models = ParseJsonString (res.value ());
235
257
// Add new models
@@ -238,9 +260,10 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
238
260
239
261
auto author_model = string_utils::SplitBy (m.id , " /" );
240
262
if (author_model.size () == 2 ) {
241
- auto const & author = author_model[0 ];
263
+ auto const & hub_author = author_model[0 ];
242
264
auto const & model_name = author_model[1 ];
243
- auto r = AddHfRepo (model_source + " /" + model_name, author, model_name);
265
+ auto r =
266
+ AddHfRepo (model_source + " /" + model_name, hub_author, model_name);
244
267
if (r.has_error ()) {
245
268
CTL_WRN (r.error ());
246
269
}
@@ -253,14 +276,14 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
253
276
}
254
277
255
278
cpp::result<bool , std::string> ModelSourceService::AddHfRepo (
256
- const std::string& model_source, const std::string& author ,
279
+ const std::string& model_source, const std::string& hub_author ,
257
280
const std::string& model_name) {
258
281
// Get models from db
259
282
260
283
auto model_list_before = db_service_->GetModels (model_source)
261
284
.value_or (std::vector<cortex::db::ModelEntry>{});
262
285
std::unordered_set<std::string> updated_model_list;
263
- auto add_res = AddRepoSiblings (model_source, author , model_name);
286
+ auto add_res = AddRepoSiblings (model_source, hub_author , model_name);
264
287
if (add_res.has_error ()) {
265
288
return cpp::fail (add_res.error ());
266
289
} else {
@@ -274,15 +297,17 @@ cpp::result<bool, std::string> ModelSourceService::AddHfRepo(
274
297
}
275
298
}
276
299
}
300
+ src_cache_[GenSourceId (hub_author, model_name)] =
301
+ std::chrono::system_clock::now ();
277
302
return true ;
278
303
}
279
304
280
305
cpp::result<std::unordered_set<std::string>, std::string>
281
306
ModelSourceService::AddRepoSiblings (const std::string& model_source,
282
- const std::string& author ,
307
+ const std::string& hub_author ,
283
308
const std::string& model_name) {
284
309
std::unordered_set<std::string> res;
285
- auto repo_info = hu::GetHuggingFaceModelRepoInfo (author , model_name);
310
+ auto repo_info = hu::GetHuggingFaceModelRepoInfo (hub_author , model_name);
286
311
if (repo_info.has_error ()) {
287
312
return cpp::fail (repo_info.error ());
288
313
}
@@ -293,14 +318,14 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
293
318
" supported." );
294
319
}
295
320
296
- auto siblings_fs = hu::GetSiblingsFileSize (author , model_name);
321
+ auto siblings_fs = hu::GetSiblingsFileSize (hub_author , model_name);
297
322
298
323
if (siblings_fs.has_error ()) {
299
- return cpp::fail (" Could not get siblings file size: " + author + " / " +
300
- model_name);
324
+ return cpp::fail (" Could not get siblings file size: " +
325
+ GenSourceId (hub_author, model_name) );
301
326
}
302
327
303
- auto readme = hu::GetReadMe (author , model_name);
328
+ auto readme = hu::GetReadMe (hub_author , model_name);
304
329
std::string desc;
305
330
if (!readme.has_error ()) {
306
331
desc = readme.value ();
@@ -326,10 +351,10 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
326
351
siblings_fs_v.file_sizes .at (sibling.rfilename ).size_in_bytes ;
327
352
}
328
353
std::string model_id =
329
- author + " :" + model_name + " :" + sibling.rfilename ;
354
+ hub_author + " :" + model_name + " :" + sibling.rfilename ;
330
355
cortex::db::ModelEntry e = {
331
356
.model = model_id,
332
- .author_repo_id = author ,
357
+ .author_repo_id = hub_author ,
333
358
.branch_name = " main" ,
334
359
.path_to_model_yaml = " " ,
335
360
.model_alias = " " ,
@@ -369,9 +394,9 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
369
394
CTL_INF (m.id );
370
395
auto author_model = string_utils::SplitBy (m.id , " /" );
371
396
if (author_model.size () == 2 ) {
372
- auto const & author = author_model[0 ];
397
+ auto const & hub_author = author_model[0 ];
373
398
auto const & model_name = author_model[1 ];
374
- auto r = AddCortexsoRepo (model_source + " /" + model_name, author ,
399
+ auto r = AddCortexsoRepo (model_source + " /" + model_name, hub_author ,
375
400
model_name);
376
401
if (r.has_error ()) {
377
402
CTL_WRN (r.error ());
@@ -386,7 +411,7 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
386
411
}
387
412
388
413
cpp::result<bool , std::string> ModelSourceService::AddCortexsoRepo (
389
- const std::string& model_source, const std::string& author ,
414
+ const std::string& model_source, const std::string& hub_author ,
390
415
const std::string& model_name) {
391
416
auto begin = std::chrono::system_clock::now ();
392
417
auto branches =
@@ -395,17 +420,23 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
395
420
return cpp::fail (branches.error ());
396
421
}
397
422
398
- auto repo_info = hu::GetHuggingFaceModelRepoInfo (author , model_name);
423
+ auto repo_info = hu::GetHuggingFaceModelRepoInfo (hub_author , model_name);
399
424
if (repo_info.has_error ()) {
400
425
return cpp::fail (repo_info.error ());
401
426
}
402
427
403
- auto readme = hu::GetReadMe (author , model_name);
428
+ auto readme = hu::GetReadMe (hub_author , model_name);
404
429
std::string desc;
405
430
if (!readme.has_error ()) {
406
431
desc = readme.value ();
407
432
}
408
433
434
+ auto author = hub_author;
435
+ if (auto model_author = hu::GetModelAuthorCortexsoHub (model_name);
436
+ model_author.has_value () && !model_author->empty ()) {
437
+ author = *model_author;
438
+ }
439
+
409
440
// Get models from db
410
441
auto model_list_before = db_service_->GetModels (model_source)
411
442
.value_or (std::vector<cortex::db::ModelEntry>{});
@@ -442,6 +473,8 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
442
473
" Duration ms: " << std::chrono::duration_cast<std::chrono::milliseconds>(
443
474
end - begin)
444
475
.count ());
476
+ src_cache_[GenSourceId (hub_author, model_name)] =
477
+ std::chrono::system_clock::now ();
445
478
return true ;
446
479
}
447
480
0 commit comments