Skip to content

Commit e86a5e6

Browse files
committed
modified: EduNLP/SIF/tokenization/text/tokenization.py
1 parent 767778f commit e86a5e6

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

EduNLP/SIF/tokenization/text/tokenization.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def tokenize(text,
100100

101101
elif (tokenizer == 'bpe'):
102102
try:
103-
tokenizer = HGTokenizer.from_file('bpeTokenizer.json')
103+
tokenizer = HGTokenizer.from_file(bpe_json)
104104
except Exception:
105105
tokenizer = huggingface_tokenizer.Tokenizer(
106106
huggingface_tokenizer.models.BPE())
@@ -109,7 +109,7 @@ def tokenize(text,
109109
trainer = BpeTrainer(
110110
special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
111111
tokenizer.train(files=[bpe_trainfile], trainer=trainer)
112-
tokenizer.save('bpeTokenizer.json', pretty=True)
112+
tokenizer.save(bpe_json, pretty=True)
113113
output = tokenizer.encode(text)
114114
output = output.tokens
115115
return output[0]

0 commit comments

Comments
 (0)