@@ -537,3 +537,96 @@ def write_tensors(self):
537
537
print (name + " -> " + new_name + ", n_dims = " + str (n_dims ) + ", " + str (old_dtype ) + " --> " + str (data .dtype ))
538
538
self .gguf_writer .add_tensor (new_name , data )
539
539
540
+
541
+ class FalconModel (Model ):
542
+ def set_gguf_parameters (self ):
543
+ block_count = self .hparams .get ("num_hidden_layers" )
544
+ if block_count is None :
545
+ block_count = self .hparams ["n_layer" ] # old name
546
+
547
+ n_head = self .hparams .get ("num_attention_heads" )
548
+ if n_head is None :
549
+ n_head = self .hparams ["n_head" ] # old name
550
+
551
+ n_head_kv = self .hparams .get ("num_kv_heads" )
552
+ if n_head_kv is None :
553
+ n_head_kv = self .hparams .get ("n_head_kv" , 1 ) # old name
554
+
555
+ self .gguf_writer .add_name ("Falcon" )
556
+ self .gguf_writer .add_context_length (2048 ) # not in config.json
557
+ self .gguf_writer .add_tensor_data_layout ("jploski" ) # qkv tensor transform
558
+ self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
559
+ self .gguf_writer .add_feed_forward_length (4 * self .hparams ["hidden_size" ])
560
+ self .gguf_writer .add_block_count (block_count )
561
+ self .gguf_writer .add_head_count (n_head )
562
+ self .gguf_writer .add_head_count_kv (n_head_kv )
563
+ self .gguf_writer .add_layer_norm_eps (self .hparams ["layer_norm_epsilon" ])
564
+ self .gguf_writer .add_file_type (self .ftype )
565
+
566
+ def write_tensors (self ):
567
+ block_count = self .hparams .get ("num_hidden_layers" )
568
+ if block_count is None :
569
+ block_count = self .hparams ["n_layer" ] # old name
570
+
571
+ n_head = self .hparams .get ("num_attention_heads" )
572
+ if n_head is None :
573
+ n_head = self .hparams ["n_head" ] # old name
574
+
575
+ n_head_kv = self .hparams .get ("num_kv_heads" )
576
+ if n_head_kv is None :
577
+ n_head_kv = self .hparams .get ("n_head_kv" , 1 ) # old name
578
+
579
+ head_dim = self .hparams ["hidden_size" ] // n_head
580
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
581
+
582
+ for name , data in self .get_tensors ():
583
+ old_dtype = data .dtype
584
+
585
+ # convert any unsupported data types to float32
586
+ if data .dtype != torch .float16 and data .dtype != torch .float32 :
587
+ data = data .to (torch .float32 )
588
+
589
+ # QKV tensor transform
590
+ # The original query_key_value tensor contains n_head_kv "kv groups",
591
+ # each consisting of n_head/n_head_kv query weights followed by one key
592
+ # and one value weight (shared by all query heads in the kv group).
593
+ # This layout makes it a big pain to work with in GGML.
594
+ # So we rearrange them here,, so that we have n_head query weights
595
+ # followed by n_head_kv key weights followed by n_head_kv value weights,
596
+ # in contiguous fashion.
597
+ # ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
598
+
599
+ if "query_key_value" in name :
600
+ qkv = data .view (n_head_kv , n_head // n_head_kv + 2 , head_dim , head_dim * n_head )
601
+ q = qkv [:, :- 2 ].reshape (n_head * head_dim , head_dim * n_head )
602
+ k = qkv [:, [- 2 ]].reshape (n_head_kv * head_dim , head_dim * n_head )
603
+ v = qkv [:, [- 1 ]].reshape (n_head_kv * head_dim , head_dim * n_head )
604
+ data = torch .cat ((q ,k ,v )).reshape_as (data )
605
+
606
+ data = data .squeeze ().numpy ()
607
+
608
+ # map tensor names
609
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
610
+ if new_name is None :
611
+ print ("Can not map tensor '" + name + "'" )
612
+ sys .exit ()
613
+
614
+ n_dims = len (data .shape )
615
+ data_dtype = data .dtype
616
+
617
+ # if f32 desired, convert any float16 to float32
618
+ if self .ftype == 0 and data_dtype == np .float16 :
619
+ data = data .astype (np .float32 )
620
+
621
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
622
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
623
+ data = data .astype (np .float32 )
624
+
625
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
626
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
627
+ data = data .astype (np .float16 )
628
+
629
+ print (new_name + ", n_dims = " + str (n_dims ) + ", " + str (old_dtype ) + " --> " + str (data .dtype ))
630
+
631
+ self .gguf_writer .add_tensor (new_name , data )
632
+
0 commit comments