@@ -47,95 +47,96 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
47
47
fout .seek ((fout .tell () + 31 ) & - 32 )
48
48
49
49
50
- if len (sys .argv ) < 2 :
51
- print (f"Usage: python { sys .argv [0 ]} <path> [arch]" )
52
- print (
53
- "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
54
- )
55
- print (f"Arch must be one of { list (gguf .MODEL_ARCH_NAMES .values ())} (default: llama)" )
56
- sys .exit (1 )
57
-
58
- input_json = os .path .join (sys .argv [1 ], "adapter_config.json" )
59
- input_model = os .path .join (sys .argv [1 ], "adapter_model.bin" )
60
- output_path = os .path .join (sys .argv [1 ], "ggml-adapter-model.bin" )
61
-
62
- model = torch .load (input_model , map_location = "cpu" )
63
- arch_name = sys .argv [2 ] if len (sys .argv ) == 3 else "llama"
64
-
65
- if arch_name not in gguf .MODEL_ARCH_NAMES .values ():
66
- print (f"Error: unsupported architecture { arch_name } " )
67
- sys .exit (1 )
68
-
69
- arch = list (gguf .MODEL_ARCH_NAMES .keys ())[list (gguf .MODEL_ARCH_NAMES .values ()).index (arch_name )]
70
- name_map = gguf .TensorNameMap (arch , 200 ) # 200 layers ought to be enough for anyone
71
-
72
- with open (input_json , "r" ) as f :
73
- params = json .load (f )
74
-
75
- if params ["peft_type" ] != "LORA" :
76
- print (f"Error: unsupported adapter type { params ['peft_type' ]} , expected LORA" )
77
- sys .exit (1 )
78
-
79
- if params ["fan_in_fan_out" ] is True :
80
- print ("Error: param fan_in_fan_out is not supported" )
81
- sys .exit (1 )
82
-
83
- if params ["bias" ] is not None and params ["bias" ] != "none" :
84
- print ("Error: param bias is not supported" )
85
- sys .exit (1 )
86
-
87
- # TODO: these seem to be layers that have been trained but without lora.
88
- # doesn't seem widely used but eventually should be supported
89
- if params ["modules_to_save" ] is not None and len (params ["modules_to_save" ]) > 0 :
90
- print ("Error: param modules_to_save is not supported" )
91
- sys .exit (1 )
92
-
93
- with open (output_path , "wb" ) as fout :
94
- fout .truncate ()
95
-
96
- write_file_header (fout , params )
97
- for k , v in model .items ():
98
- orig_k = k
99
- if k .endswith (".default.weight" ):
100
- k = k .replace (".default.weight" , ".weight" )
101
- if k in ["llama_proj.weight" , "llama_proj.bias" ]:
102
- continue
103
- if k .endswith ("lora_A.weight" ):
104
- if v .dtype != torch .float16 and v .dtype != torch .float32 :
50
+ if __name__ == '__main__' :
51
+ if len (sys .argv ) < 2 :
52
+ print (f"Usage: python { sys .argv [0 ]} <path> [arch]" )
53
+ print (
54
+ "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
55
+ )
56
+ print (f"Arch must be one of { list (gguf .MODEL_ARCH_NAMES .values ())} (default: llama)" )
57
+ sys .exit (1 )
58
+
59
+ input_json = os .path .join (sys .argv [1 ], "adapter_config.json" )
60
+ input_model = os .path .join (sys .argv [1 ], "adapter_model.bin" )
61
+ output_path = os .path .join (sys .argv [1 ], "ggml-adapter-model.bin" )
62
+
63
+ model = torch .load (input_model , map_location = "cpu" )
64
+ arch_name = sys .argv [2 ] if len (sys .argv ) == 3 else "llama"
65
+
66
+ if arch_name not in gguf .MODEL_ARCH_NAMES .values ():
67
+ print (f"Error: unsupported architecture { arch_name } " )
68
+ sys .exit (1 )
69
+
70
+ arch = list (gguf .MODEL_ARCH_NAMES .keys ())[list (gguf .MODEL_ARCH_NAMES .values ()).index (arch_name )]
71
+ name_map = gguf .TensorNameMap (arch , 200 ) # 200 layers ought to be enough for anyone
72
+
73
+ with open (input_json , "r" ) as f :
74
+ params = json .load (f )
75
+
76
+ if params ["peft_type" ] != "LORA" :
77
+ print (f"Error: unsupported adapter type { params ['peft_type' ]} , expected LORA" )
78
+ sys .exit (1 )
79
+
80
+ if params ["fan_in_fan_out" ] is True :
81
+ print ("Error: param fan_in_fan_out is not supported" )
82
+ sys .exit (1 )
83
+
84
+ if params ["bias" ] is not None and params ["bias" ] != "none" :
85
+ print ("Error: param bias is not supported" )
86
+ sys .exit (1 )
87
+
88
+ # TODO: these seem to be layers that have been trained but without lora.
89
+ # doesn't seem widely used but eventually should be supported
90
+ if params ["modules_to_save" ] is not None and len (params ["modules_to_save" ]) > 0 :
91
+ print ("Error: param modules_to_save is not supported" )
92
+ sys .exit (1 )
93
+
94
+ with open (output_path , "wb" ) as fout :
95
+ fout .truncate ()
96
+
97
+ write_file_header (fout , params )
98
+ for k , v in model .items ():
99
+ orig_k = k
100
+ if k .endswith (".default.weight" ):
101
+ k = k .replace (".default.weight" , ".weight" )
102
+ if k in ["llama_proj.weight" , "llama_proj.bias" ]:
103
+ continue
104
+ if k .endswith ("lora_A.weight" ):
105
+ if v .dtype != torch .float16 and v .dtype != torch .float32 :
106
+ v = v .float ()
107
+ v = v .T
108
+ else :
105
109
v = v .float ()
106
- v = v .T
107
- else :
108
- v = v .float ()
109
-
110
- t = v .detach ().numpy ()
111
-
112
- prefix = "base_model.model."
113
- if k .startswith (prefix ):
114
- k = k [len (prefix ) :]
115
-
116
- lora_suffixes = (".lora_A.weight" , ".lora_B.weight" )
117
- if k .endswith (lora_suffixes ):
118
- suffix = k [- len (lora_suffixes [0 ]):]
119
- k = k [: - len (lora_suffixes [0 ])]
120
- else :
121
- print (f"Error: unrecognized tensor name { orig_k } " )
122
- sys .exit (1 )
123
-
124
- tname = name_map .get_name (k )
125
- if tname is None :
126
- print (f"Error: could not map tensor name { orig_k } " )
127
- print (" Note: the arch parameter must be specified if the model is not llama" )
128
- sys .exit (1 )
129
-
130
- if suffix == ".lora_A.weight" :
131
- tname += ".weight.loraA"
132
- elif suffix == ".lora_B.weight" :
133
- tname += ".weight.loraB"
134
- else :
135
- assert False
136
-
137
- print (f"{ k } => { tname } { t .shape } { t .dtype } { t .nbytes / 1024 / 1024 :.2f} MB" )
138
- write_tensor_header (fout , tname , t .shape , t .dtype )
139
- t .tofile (fout )
140
-
141
- print (f"Converted { input_json } and { input_model } to { output_path } " )
110
+
111
+ t = v .detach ().numpy ()
112
+
113
+ prefix = "base_model.model."
114
+ if k .startswith (prefix ):
115
+ k = k [len (prefix ) :]
116
+
117
+ lora_suffixes = (".lora_A.weight" , ".lora_B.weight" )
118
+ if k .endswith (lora_suffixes ):
119
+ suffix = k [- len (lora_suffixes [0 ]):]
120
+ k = k [: - len (lora_suffixes [0 ])]
121
+ else :
122
+ print (f"Error: unrecognized tensor name { orig_k } " )
123
+ sys .exit (1 )
124
+
125
+ tname = name_map .get_name (k )
126
+ if tname is None :
127
+ print (f"Error: could not map tensor name { orig_k } " )
128
+ print (" Note: the arch parameter must be specified if the model is not llama" )
129
+ sys .exit (1 )
130
+
131
+ if suffix == ".lora_A.weight" :
132
+ tname += ".weight.loraA"
133
+ elif suffix == ".lora_B.weight" :
134
+ tname += ".weight.loraB"
135
+ else :
136
+ assert False
137
+
138
+ print (f"{ k } => { tname } { t .shape } { t .dtype } { t .nbytes / 1024 / 1024 :.2f} MB" )
139
+ write_tensor_header (fout , tname , t .shape , t .dtype )
140
+ t .tofile (fout )
141
+
142
+ print (f"Converted { input_json } and { input_model } to { output_path } " )
0 commit comments