File tree 1 file changed +0
-24
lines changed
1 file changed +0
-24
lines changed Original file line number Diff line number Diff line change @@ -568,30 +568,6 @@ def set_gguf_parameters(self):
568
568
self .gguf_writer .add_layer_norm_eps (self .hparams ["layer_norm_epsilon" ])
569
569
self .gguf_writer .add_file_type (self .ftype )
570
570
571
- def set_vocab (self ):
572
- tokens = []
573
- scores = []
574
- toktypes = []
575
-
576
- from transformers import AutoTokenizer
577
- tokenizer = AutoTokenizer .from_pretrained (self .dir_model )
578
- vocab_size = self .hparams .get ("vocab_size" , len (tokenizer .vocab ))
579
- assert max (tokenizer .vocab .values ()) < vocab_size
580
-
581
- reverse_vocab = {id : encoded_tok for encoded_tok , id in tokenizer .vocab .items ()}
582
-
583
- for i in range (vocab_size ):
584
- tokens .append (reverse_vocab [i ])
585
- scores .append (0.0 ) # dummy
586
- toktypes .append (gguf .TokenType .NORMAL )
587
-
588
- self .gguf_writer .add_token_list (tokens )
589
- self .gguf_writer .add_token_scores (scores )
590
- self .gguf_writer .add_token_types (toktypes )
591
-
592
- special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True , n_vocab = len (tokens ))
593
- special_vocab .add_to_gguf (self .gguf_writer )
594
-
595
571
def write_tensors (self ):
596
572
block_count = self .hparams .get ("num_hidden_layers" )
597
573
if block_count is None :
You can’t perform that action at this time.
0 commit comments