Skip to content

Commit 98c1db8

Browse files
committed
add test for Armenian
1 parent d929c01 commit 98c1db8

File tree

1 file changed

+63
-43
lines changed

1 file changed

+63
-43
lines changed

charabia/src/normalizer/lowercase.rs

Lines changed: 63 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -44,57 +44,77 @@ mod test {
4444

4545
fn tokens() -> Vec<Token<'static>> {
4646
vec![Token {
47-
lemma: Owned("PascalCase".to_string()),
48-
char_end: 10,
49-
byte_end: 10,
50-
script: Script::Latin,
51-
..Default::default()
52-
}]
47+
lemma: Owned("PascalCase".to_string()),
48+
char_end: 10,
49+
byte_end: 10,
50+
script: Script::Latin,
51+
..Default::default()
52+
}, Token {
53+
lemma: Owned("ֆիզիկոսը".to_string()),
54+
char_end: 8,
55+
byte_end: 16,
56+
script: Script::Armenian,
57+
..Default::default()
58+
}]
5359
}
5460

5561
fn normalizer_result() -> Vec<Token<'static>> {
5662
vec![Token {
57-
lemma: Owned("pascalcase".to_string()),
58-
char_end: 10,
59-
byte_end: 10,
60-
script: Script::Latin,
61-
char_map: Some(vec![
62-
(1, 1),
63-
(1, 1),
64-
(1, 1),
65-
(1, 1),
66-
(1, 1),
67-
(1, 1),
68-
(1, 1),
69-
(1, 1),
70-
(1, 1),
71-
(1, 1),
72-
]),
73-
..Default::default()
74-
}]
63+
lemma: Owned("pascalcase".to_string()),
64+
char_end: 10,
65+
byte_end: 10,
66+
script: Script::Latin,
67+
char_map: Some(vec![
68+
(1, 1),
69+
(1, 1),
70+
(1, 1),
71+
(1, 1),
72+
(1, 1),
73+
(1, 1),
74+
(1, 1),
75+
(1, 1),
76+
(1, 1),
77+
(1, 1),
78+
]),
79+
..Default::default()
80+
}, Token {
81+
lemma: Owned("ֆիզիկոսը".to_string()),
82+
char_end: 8,
83+
byte_end: 16,
84+
script: Script::Armenian,
85+
..Default::default()
86+
}]
7587
}
7688

7789
fn normalized_tokens() -> Vec<Token<'static>> {
7890
vec![Token {
79-
lemma: Owned("pascalcase".to_string()),
80-
char_end: 10,
81-
byte_end: 10,
82-
script: Script::Latin,
83-
kind: TokenKind::Word,
84-
char_map: Some(vec![
85-
(1, 1),
86-
(1, 1),
87-
(1, 1),
88-
(1, 1),
89-
(1, 1),
90-
(1, 1),
91-
(1, 1),
92-
(1, 1),
93-
(1, 1),
94-
(1, 1),
95-
]),
96-
..Default::default()
97-
}]
91+
lemma: Owned("pascalcase".to_string()),
92+
char_end: 10,
93+
byte_end: 10,
94+
script: Script::Latin,
95+
kind: TokenKind::Word,
96+
char_map: Some(vec![
97+
(1, 1),
98+
(1, 1),
99+
(1, 1),
100+
(1, 1),
101+
(1, 1),
102+
(1, 1),
103+
(1, 1),
104+
(1, 1),
105+
(1, 1),
106+
(1, 1),
107+
]),
108+
..Default::default()
109+
},
110+
Token {
111+
lemma: Owned("ֆիզիկոսը".to_string()),
112+
char_end: 8,
113+
byte_end: 16,
114+
script: Script::Armenian,
115+
kind: TokenKind::Word,
116+
..Default::default()
117+
}]
98118
}
99119

100120
test_normalizer!(LowercaseNormalizer, tokens(), normalizer_result(), normalized_tokens());

0 commit comments

Comments
 (0)