@@ -47,95 +47,96 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
4747 fout .seek ((fout .tell () + 31 ) & - 32 )
4848
4949
50- if len (sys .argv ) < 2 :
51- print (f"Usage: python { sys .argv [0 ]} <path> [arch]" )
52- print (
53- "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
54- )
55- print (f"Arch must be one of { list (gguf .MODEL_ARCH_NAMES .values ())} (default: llama)" )
56- sys .exit (1 )
57-
58- input_json = os .path .join (sys .argv [1 ], "adapter_config.json" )
59- input_model = os .path .join (sys .argv [1 ], "adapter_model.bin" )
60- output_path = os .path .join (sys .argv [1 ], "ggml-adapter-model.bin" )
61-
62- model = torch .load (input_model , map_location = "cpu" )
63- arch_name = sys .argv [2 ] if len (sys .argv ) == 3 else "llama"
64-
65- if arch_name not in gguf .MODEL_ARCH_NAMES .values ():
66- print (f"Error: unsupported architecture { arch_name } " )
67- sys .exit (1 )
68-
69- arch = list (gguf .MODEL_ARCH_NAMES .keys ())[list (gguf .MODEL_ARCH_NAMES .values ()).index (arch_name )]
70- name_map = gguf .TensorNameMap (arch , 200 ) # 200 layers ought to be enough for anyone
71-
72- with open (input_json , "r" ) as f :
73- params = json .load (f )
74-
75- if params ["peft_type" ] != "LORA" :
76- print (f"Error: unsupported adapter type { params ['peft_type' ]} , expected LORA" )
77- sys .exit (1 )
78-
79- if params ["fan_in_fan_out" ] is True :
80- print ("Error: param fan_in_fan_out is not supported" )
81- sys .exit (1 )
82-
83- if params ["bias" ] is not None and params ["bias" ] != "none" :
84- print ("Error: param bias is not supported" )
85- sys .exit (1 )
86-
87- # TODO: these seem to be layers that have been trained but without lora.
88- # doesn't seem widely used but eventually should be supported
89- if params ["modules_to_save" ] is not None and len (params ["modules_to_save" ]) > 0 :
90- print ("Error: param modules_to_save is not supported" )
91- sys .exit (1 )
92-
93- with open (output_path , "wb" ) as fout :
94- fout .truncate ()
95-
96- write_file_header (fout , params )
97- for k , v in model .items ():
98- orig_k = k
99- if k .endswith (".default.weight" ):
100- k = k .replace (".default.weight" , ".weight" )
101- if k in ["llama_proj.weight" , "llama_proj.bias" ]:
102- continue
103- if k .endswith ("lora_A.weight" ):
104- if v .dtype != torch .float16 and v .dtype != torch .float32 :
50+ if __name__ == '__main__' :
51+ if len (sys .argv ) < 2 :
52+ print (f"Usage: python { sys .argv [0 ]} <path> [arch]" )
53+ print (
54+ "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
55+ )
56+ print (f"Arch must be one of { list (gguf .MODEL_ARCH_NAMES .values ())} (default: llama)" )
57+ sys .exit (1 )
58+
59+ input_json = os .path .join (sys .argv [1 ], "adapter_config.json" )
60+ input_model = os .path .join (sys .argv [1 ], "adapter_model.bin" )
61+ output_path = os .path .join (sys .argv [1 ], "ggml-adapter-model.bin" )
62+
63+ model = torch .load (input_model , map_location = "cpu" )
64+ arch_name = sys .argv [2 ] if len (sys .argv ) == 3 else "llama"
65+
66+ if arch_name not in gguf .MODEL_ARCH_NAMES .values ():
67+ print (f"Error: unsupported architecture { arch_name } " )
68+ sys .exit (1 )
69+
70+ arch = list (gguf .MODEL_ARCH_NAMES .keys ())[list (gguf .MODEL_ARCH_NAMES .values ()).index (arch_name )]
71+ name_map = gguf .TensorNameMap (arch , 200 ) # 200 layers ought to be enough for anyone
72+
73+ with open (input_json , "r" ) as f :
74+ params = json .load (f )
75+
76+ if params ["peft_type" ] != "LORA" :
77+ print (f"Error: unsupported adapter type { params ['peft_type' ]} , expected LORA" )
78+ sys .exit (1 )
79+
80+ if params ["fan_in_fan_out" ] is True :
81+ print ("Error: param fan_in_fan_out is not supported" )
82+ sys .exit (1 )
83+
84+ if params ["bias" ] is not None and params ["bias" ] != "none" :
85+ print ("Error: param bias is not supported" )
86+ sys .exit (1 )
87+
88+ # TODO: these seem to be layers that have been trained but without lora.
89+ # doesn't seem widely used but eventually should be supported
90+ if params ["modules_to_save" ] is not None and len (params ["modules_to_save" ]) > 0 :
91+ print ("Error: param modules_to_save is not supported" )
92+ sys .exit (1 )
93+
94+ with open (output_path , "wb" ) as fout :
95+ fout .truncate ()
96+
97+ write_file_header (fout , params )
98+ for k , v in model .items ():
99+ orig_k = k
100+ if k .endswith (".default.weight" ):
101+ k = k .replace (".default.weight" , ".weight" )
102+ if k in ["llama_proj.weight" , "llama_proj.bias" ]:
103+ continue
104+ if k .endswith ("lora_A.weight" ):
105+ if v .dtype != torch .float16 and v .dtype != torch .float32 :
106+ v = v .float ()
107+ v = v .T
108+ else :
105109 v = v .float ()
106- v = v .T
107- else :
108- v = v .float ()
109-
110- t = v .detach ().numpy ()
111-
112- prefix = "base_model.model."
113- if k .startswith (prefix ):
114- k = k [len (prefix ) :]
115-
116- lora_suffixes = (".lora_A.weight" , ".lora_B.weight" )
117- if k .endswith (lora_suffixes ):
118- suffix = k [- len (lora_suffixes [0 ]):]
119- k = k [: - len (lora_suffixes [0 ])]
120- else :
121- print (f"Error: unrecognized tensor name { orig_k } " )
122- sys .exit (1 )
123-
124- tname = name_map .get_name (k )
125- if tname is None :
126- print (f"Error: could not map tensor name { orig_k } " )
127- print (" Note: the arch parameter must be specified if the model is not llama" )
128- sys .exit (1 )
129-
130- if suffix == ".lora_A.weight" :
131- tname += ".weight.loraA"
132- elif suffix == ".lora_B.weight" :
133- tname += ".weight.loraB"
134- else :
135- assert False
136-
137- print (f"{ k } => { tname } { t .shape } { t .dtype } { t .nbytes / 1024 / 1024 :.2f} MB" )
138- write_tensor_header (fout , tname , t .shape , t .dtype )
139- t .tofile (fout )
140-
141- print (f"Converted { input_json } and { input_model } to { output_path } " )
110+
111+ t = v .detach ().numpy ()
112+
113+ prefix = "base_model.model."
114+ if k .startswith (prefix ):
115+ k = k [len (prefix ) :]
116+
117+ lora_suffixes = (".lora_A.weight" , ".lora_B.weight" )
118+ if k .endswith (lora_suffixes ):
119+ suffix = k [- len (lora_suffixes [0 ]):]
120+ k = k [: - len (lora_suffixes [0 ])]
121+ else :
122+ print (f"Error: unrecognized tensor name { orig_k } " )
123+ sys .exit (1 )
124+
125+ tname = name_map .get_name (k )
126+ if tname is None :
127+ print (f"Error: could not map tensor name { orig_k } " )
128+ print (" Note: the arch parameter must be specified if the model is not llama" )
129+ sys .exit (1 )
130+
131+ if suffix == ".lora_A.weight" :
132+ tname += ".weight.loraA"
133+ elif suffix == ".lora_B.weight" :
134+ tname += ".weight.loraB"
135+ else :
136+ assert False
137+
138+ print (f"{ k } => { tname } { t .shape } { t .dtype } { t .nbytes / 1024 / 1024 :.2f} MB" )
139+ write_tensor_header (fout , tname , t .shape , t .dtype )
140+ t .tofile (fout )
141+
142+ print (f"Converted { input_json } and { input_model } to { output_path } " )
0 commit comments