Skip to content

Commit fcae724

Browse files
committed
Revert "Temporary revert to old vocab conversion for falcon"
This reverts commit 63dd07a.
1 parent 80de47c commit fcae724

File tree

1 file changed

+0
-24
lines changed

1 file changed

+0
-24
lines changed

model.py

-24
Original file line numberDiff line numberDiff line change
@@ -568,30 +568,6 @@ def set_gguf_parameters(self):
568568
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
569569
self.gguf_writer.add_file_type(self.ftype)
570570

571-
def set_vocab(self):
572-
tokens = []
573-
scores = []
574-
toktypes = []
575-
576-
from transformers import AutoTokenizer
577-
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
578-
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
579-
assert max(tokenizer.vocab.values()) < vocab_size
580-
581-
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
582-
583-
for i in range(vocab_size):
584-
tokens.append(reverse_vocab[i])
585-
scores.append(0.0) # dummy
586-
toktypes.append(gguf.TokenType.NORMAL)
587-
588-
self.gguf_writer.add_token_list(tokens)
589-
self.gguf_writer.add_token_scores(scores)
590-
self.gguf_writer.add_token_types(toktypes)
591-
592-
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges = True, n_vocab = len(tokens))
593-
special_vocab.add_to_gguf(self.gguf_writer)
594-
595571
def write_tensors(self):
596572
block_count = self.hparams.get("num_hidden_layers")
597573
if block_count is None:

0 commit comments

Comments
 (0)