Skip to content

Commit 767778f

Browse files
committed
modified: EduNLP/SIF/tokenization/text/tokenization.py
modified: tests/test_tokenizer/test_tokenizer.py
1 parent 05172b4 commit 767778f

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

EduNLP/SIF/tokenization/text/tokenization.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def tokenize(text,
101101
elif (tokenizer == 'bpe'):
102102
try:
103103
tokenizer = HGTokenizer.from_file('bpeTokenizer.json')
104-
except :
104+
except Exception:
105105
tokenizer = huggingface_tokenizer.Tokenizer(
106106
huggingface_tokenizer.models.BPE())
107107
if (bpe_trainfile is None):

tests/test_tokenizer/test_tokenizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test_TokenizerSpacy():
7171
some, there are still 4 packs left, 25 each, how many are sold?"]
7272
ans = [
7373
'The', 'stationery', 'store', 'has', '600', 'exercise',
74-
'books', 'and', 'after', 'selling', ' ', 'some', 'there', 'are', 'still',
74+
'books', 'and', 'after', 'selling', ' ', 'some', 'there', 'are', 'still',
7575
'4', 'packs', 'left', '25', 'each', 'how', 'many', 'are', 'sold'
7676
]
7777
tokenizer = get_tokenizer("pure_text",
@@ -84,7 +84,7 @@ def test_TokenizerSpacy():
8484
def test_TokenizerBPE():
8585
items = ['The stationery store has $600$ exercise books, and after selling some,\
8686
there are still $4$ packs left, $25$ each, how many are sold?']
87-
ans = ['h', '600', ' ', '^', '4', '^', ' ', '25', ' ']
87+
ans = ['h', '600', ' ', '4', ' ', '25', ' ']
8888
data_path = path_append(abs_current_dir(__file__),
8989
"../../static/test_data/standard_luna_data.json", to_str=True)
9090
tokenizer = get_tokenizer("pure_text", text_params={"tokenizer": 'bpe', "stopwords": set(",?"),

0 commit comments

Comments
 (0)