@@ -65,6 +65,7 @@ class Model:
65
65
model_name : str | None
66
66
metadata_override : Path | None
67
67
dir_model_card : Path
68
+ remote_hf_model_id : str | None
68
69
69
70
# subclasses should define this!
70
71
model_arch : gguf .MODEL_ARCH
@@ -73,7 +74,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
73
74
use_temp_file : bool = False , eager : bool = False ,
74
75
metadata_override : Path | None = None , model_name : str | None = None ,
75
76
split_max_tensors : int = 0 , split_max_size : int = 0 , dry_run : bool = False ,
76
- small_first_shard : bool = False , hparams : dict [str , Any ] | None = None ):
77
+ small_first_shard : bool = False , hparams : dict [str , Any ] | None = None , remote_hf_model_id : str | None = None ):
77
78
if type (self ) is Model :
78
79
raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
79
80
@@ -83,11 +84,24 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
83
84
self .is_big_endian = is_big_endian
84
85
self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
85
86
self .use_temp_file = use_temp_file
86
- self .lazy = not eager
87
- self .part_names = Model .get_model_part_names (self .dir_model , "model" , ".safetensors" )
88
- self .is_safetensors = len (self .part_names ) > 0
89
- if not self .is_safetensors :
90
- self .part_names = Model .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" )
87
+ self .lazy = not eager or (remote_hf_model_id is not None )
88
+ self .remote_hf_model_id = remote_hf_model_id
89
+ if remote_hf_model_id is not None :
90
+ self .is_safetensors = True
91
+
92
+ def get_remote_tensors () -> Iterator [tuple [str , Tensor ]]:
93
+ logger .info (f"Using remote model with HuggingFace id: { remote_hf_model_id } " )
94
+ remote_tensors = gguf .utility .SafetensorRemote .get_list_tensors_hf_model (remote_hf_model_id )
95
+ self .tensor_names = set (name for name in remote_tensors .keys ())
96
+ for name , remote_tensor in gguf .utility .SafetensorRemote .get_list_tensors_hf_model (remote_hf_model_id ).items ():
97
+ yield (name , LazyTorchTensor .from_remote_tensor (remote_tensor ))
98
+
99
+ self .get_tensors = get_remote_tensors
100
+ else :
101
+ self .part_names = Model .get_model_part_names (self .dir_model , "model" , ".safetensors" )
102
+ self .is_safetensors = len (self .part_names ) > 0
103
+ if not self .is_safetensors :
104
+ self .part_names = Model .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" )
91
105
self .hparams = Model .load_hparams (self .dir_model ) if hparams is None else hparams
92
106
self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" ])
93
107
self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
@@ -393,6 +407,10 @@ def prepare_metadata(self, vocab_only: bool):
393
407
394
408
self .metadata = gguf .Metadata .load (self .metadata_override , self .dir_model_card , self .model_name , total_params )
395
409
410
+ # If we are using HF model id, set the metadata name to the model id
411
+ if self .remote_hf_model_id :
412
+ self .metadata .name = self .remote_hf_model_id
413
+
396
414
# Fallback to model directory name if metadata name is still missing
397
415
if self .metadata .name is None :
398
416
self .metadata .name = self .dir_model .name
@@ -1788,10 +1806,6 @@ def set_gguf_parameters(self):
1788
1806
self .gguf_writer .add_expert_feed_forward_length (self .hparams ["intermediate_size_moe" ])
1789
1807
1790
1808
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
1791
- name = name .replace ("language_model." , "" )
1792
- name = name .replace ("feed_forward." , "mlp." ) # a bit hacky for now
1793
- name = name .replace (".router.weight" , ".gate.weight" ) # a bit hacky for now
1794
-
1795
1809
# split the gate_up into gate and up
1796
1810
if "gate_up_proj" in name :
1797
1811
name_up = name .replace ("gate_up_proj" , "up_proj.weight" )
@@ -5403,6 +5417,14 @@ def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
5403
5417
lazy = cls (meta = cls .meta_with_dtype_and_shape (dtype , shape ), args = (st_slice ,), func = lambda s : s [:])
5404
5418
return cast (torch .Tensor , lazy )
5405
5419
5420
+ @classmethod
5421
+ def from_remote_tensor (cls , remote_tensor : gguf .utility .RemoteTensor ):
5422
+ dtype = cls ._dtype_str_map [remote_tensor .dtype ]
5423
+ shape = remote_tensor .shape
5424
+ meta = cls .meta_with_dtype_and_shape (dtype , shape )
5425
+ lazy = cls (meta = meta , args = (remote_tensor ,), func = lambda r : torch .frombuffer (r .data (), dtype = dtype ).reshape (shape ))
5426
+ return cast (torch .Tensor , lazy )
5427
+
5406
5428
@classmethod
5407
5429
def __torch_function__ (cls , func , types , args = (), kwargs = None ):
5408
5430
del types # unused
@@ -5480,6 +5502,10 @@ def parse_args() -> argparse.Namespace:
5480
5502
"--print-supported-models" , action = "store_true" ,
5481
5503
help = "Print the supported models"
5482
5504
)
5505
+ parser .add_argument (
5506
+ "--remote" , action = "store_true" ,
5507
+ help = "(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token." ,
5508
+ )
5483
5509
5484
5510
args = parser .parse_args ()
5485
5511
if not args .print_supported_models and args .model is None :
@@ -5520,6 +5546,14 @@ def main() -> None:
5520
5546
5521
5547
dir_model = args .model
5522
5548
5549
+ if args .remote :
5550
+ from huggingface_hub import snapshot_download
5551
+ local_dir = snapshot_download (
5552
+ repo_id = str (dir_model ),
5553
+ allow_patterns = ["LICENSE" , "*.json" , "*.md" , "*.txt" , "tokenizer.model" ])
5554
+ dir_model = Path (local_dir )
5555
+ logger .info (f"Downloaded config and tokenizer to { local_dir } " )
5556
+
5523
5557
if not dir_model .is_dir ():
5524
5558
logger .error (f'Error: { args .model } is not a directory' )
5525
5559
sys .exit (1 )
@@ -5541,6 +5575,9 @@ def main() -> None:
5541
5575
5542
5576
if args .outfile is not None :
5543
5577
fname_out = args .outfile
5578
+ elif args .remote :
5579
+ # if remote, use the model ID as the output file name
5580
+ fname_out = Path ("./" + str (args .model ).replace ("/" , "-" ) + "-{ftype}.gguf" )
5544
5581
else :
5545
5582
fname_out = dir_model
5546
5583
@@ -5564,7 +5601,8 @@ def main() -> None:
5564
5601
metadata_override = args .metadata , model_name = args .model_name ,
5565
5602
split_max_tensors = args .split_max_tensors ,
5566
5603
split_max_size = split_str_to_n_bytes (args .split_max_size ), dry_run = args .dry_run ,
5567
- small_first_shard = args .no_tensor_first_split )
5604
+ small_first_shard = args .no_tensor_first_split ,
5605
+ remote_hf_model_id = str (args .model ) if args .remote else None )
5568
5606
5569
5607
if args .vocab_only :
5570
5608
logger .info ("Exporting model vocab..." )
0 commit comments