Skip to content

Commit

Permalink
add test for Armenian
Browse files Browse the repository at this point in the history
  • Loading branch information
NarHakobyan committed Feb 12, 2025
1 parent d929c01 commit 98c1db8
Showing 1 changed file with 63 additions and 43 deletions.
106 changes: 63 additions & 43 deletions charabia/src/normalizer/lowercase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,57 +44,77 @@ mod test {

fn tokens() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("PascalCase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
..Default::default()
}]
lemma: Owned("PascalCase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
..Default::default()
}, Token {
lemma: Owned("ֆիզիկոսը".to_string()),
char_end: 8,
byte_end: 16,
script: Script::Armenian,
..Default::default()
}]
}

fn normalizer_result() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("pascalcase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
char_map: Some(vec![
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
]),
..Default::default()
}]
lemma: Owned("pascalcase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
char_map: Some(vec![
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
]),
..Default::default()
}, Token {
lemma: Owned("ֆիզիկոսը".to_string()),
char_end: 8,
byte_end: 16,
script: Script::Armenian,
..Default::default()
}]
}

fn normalized_tokens() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("pascalcase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
kind: TokenKind::Word,
char_map: Some(vec![
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
]),
..Default::default()
}]
lemma: Owned("pascalcase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
kind: TokenKind::Word,
char_map: Some(vec![
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
(1, 1),
]),
..Default::default()
},
Token {
lemma: Owned("ֆիզիկոսը".to_string()),
char_end: 8,
byte_end: 16,
script: Script::Armenian,
kind: TokenKind::Word,
..Default::default()
}]
}

test_normalizer!(LowercaseNormalizer, tokens(), normalizer_result(), normalized_tokens());
Expand Down

0 comments on commit 98c1db8

Please sign in to comment.