-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathunittests.py
58 lines (38 loc) · 1.58 KB
/
unittests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import unittest
class TokenizerTests(unittest.TestCase):
def test_tokenizer(self):
"""
tests if tokenizer actualy tokenizing words
"""
from talkdesk import Tokenizer
tokenizer = Tokenizer()
text = "aveiro's the best looking city. cities. killing it administrator's"
tokenized_text = tokenizer.tokenize(text, filter_lemma=False)
self.assertListEqual(tokenized_text, ['aveiro', "'s", 'the', 'best', 'looking', 'city', '.', 'cities', '.', 'killing', 'it', 'administrator', "'s"])
def test_lemma_filter(self):
"""
tests if lemmas are being filtered
:return:
"""
from talkdesk import Tokenizer
tokenizer = Tokenizer()
text = ' ! 123 1.3€ 1€ myself will in has is'
tokenized_text = tokenizer.tokenize(text)
print(tokenized_text)
print(len(tokenized_text))
self.assertListEqual(tokenized_text, [])
class InputTester(unittest.TestCase):
def test_multiple_tags(self):
# do we catch 4 tags?
input_text = "content:word1 word2 word3 AND title:some really long title AND tag1:tagg tag2:taggg"
from auxfunctions import parse_input
parsed_input = parse_input(input_text)
self.assertEqual(len(parsed_input), 2)
def test_quoted_search(self):
input_text = '"President Obama"'
from auxfunctions import parse_input
parsed_input = parse_input(input_text)
query = parsed_input.pop()
self.assertEqual(query[2], True)
if __name__ == '__main__':
unittest.main()