@@ -71,7 +71,7 @@ def test_TokenizerSpacy():
71
71
some, there are still 4 packs left, 25 each, how many are sold?" ]
72
72
ans = [
73
73
'The' , 'stationery' , 'store' , 'has' , '600' , 'exercise' ,
74
- 'books' , 'and' , 'after' , 'selling' , ' ' , 'some' , 'there' , 'are' , 'still' ,
74
+ 'books' , 'and' , 'after' , 'selling' , ' ' , 'some' , 'there' , 'are' , 'still' ,
75
75
'4' , 'packs' , 'left' , '25' , 'each' , 'how' , 'many' , 'are' , 'sold'
76
76
]
77
77
tokenizer = get_tokenizer ("pure_text" ,
@@ -84,7 +84,7 @@ def test_TokenizerSpacy():
84
84
def test_TokenizerBPE ():
85
85
items = ['The stationery store has $600$ exercise books, and after selling some,\
86
86
there are still $4$ packs left, $25$ each, how many are sold?' ]
87
- ans = ['h' , '600' , ' ' , '^' , '4' , '^ ' , ' ' , '25' , ' ' ]
87
+ ans = ['h' , '600' , ' ' , '4 ' , ' ' , '25' , ' ' ]
88
88
data_path = path_append (abs_current_dir (__file__ ),
89
89
"../../static/test_data/standard_luna_data.json" , to_str = True )
90
90
tokenizer = get_tokenizer ("pure_text" , text_params = {"tokenizer" : 'bpe' , "stopwords" : set (",?" ),
0 commit comments