Skip to content

Commit

Permalink
Allow configuring the Checker to convert lowercase to other casings
Browse files Browse the repository at this point in the history
This can be useful for checking in text that doesn't follow normal
casing conventions. For example "Alice" should be correct but not
"alice" in prose but in source code conventions about naming variables
tend to enforce lower casing.
  • Loading branch information
the-mikedavis committed Jan 28, 2025
1 parent 6b3d6f6 commit b6c31ea
Showing 1 changed file with 78 additions and 2 deletions.
80 changes: 78 additions & 2 deletions src/checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@ macro_rules! flag {
pub struct Checker<'a, S: BuildHasher> {
pub(crate) words: &'a WordList<S>,
pub(crate) aff: &'a AffData,

check_lower_as_title: bool,
check_lower_as_upper: bool,
}

impl<S: BuildHasher> fmt::Debug for Checker<'_, S> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Checker")
.field("words", &self.words.len())
.field("check_lower_as_title", &self.check_lower_as_title)
.field("check_lower_as_upper", &self.check_lower_as_upper)
.finish_non_exhaustive()
}
}
Expand All @@ -43,9 +48,51 @@ impl<'a, S: BuildHasher> Checker<'a, S> {
Self {
words: &dict.words,
aff: &dict.aff_data,
check_lower_as_title: false,
check_lower_as_upper: false,
}
}

/// Configures the `Checker` to check lowercase words in titlecase form.
///
/// Normally lowercase words are checked in titlecase or uppercase forms. For example "Alice"
/// is correct in the `en_US` dictionary but not "alice," and "RSVP" is correct but not
/// "rsvp."
///
/// # Example
///
/// ```
/// let aff = std::fs::read_to_string("./vendor/en_US/en_US.aff").unwrap();
/// let dic = std::fs::read_to_string("./vendor/en_US/en_US.dic").unwrap();
/// let dict = spellbook::Dictionary::new(&aff, &dic).unwrap();
///
/// assert!(!dict.check("alice"));
/// assert!(dict.checker().check_lower_as_title(true).check("alice"));
/// ```
pub fn check_lower_as_title(mut self, check_lower_as_title: bool) -> Self {
self.check_lower_as_title = check_lower_as_title;
self
}

/// Configures the `Checker` to check lowercase words in uppercase form.
///
/// See `Checker::check_lower_as_title`.
///
/// # Example
///
/// ```
/// let aff = std::fs::read_to_string("./vendor/en_US/en_US.aff").unwrap();
/// let dic = std::fs::read_to_string("./vendor/en_US/en_US.dic").unwrap();
/// let dict = spellbook::Dictionary::new(&aff, &dic).unwrap();
///
/// assert!(!dict.check("rsvp"));
/// assert!(dict.checker().check_lower_as_upper(true).check("rsvp"));
/// ```
pub fn check_lower_as_upper(mut self, check_lower_as_upper: bool) -> Self {
self.check_lower_as_upper = check_lower_as_upper;
self
}

/// Checks that the word is valid according to the dictionary.
pub fn check(&self, word: &str) -> bool {
if word.len() > MAX_WORD_LEN {
Expand Down Expand Up @@ -146,9 +193,30 @@ impl<'a, S: BuildHasher> Checker<'a, S> {
}

fn spell_casing(&self, word: &str) -> Option<&FlagSet> {
match classify_casing(word) {
let casing = classify_casing(word);
match casing {
Casing::None | Casing::Camel | Casing::Pascal => {
self.check_word(word, Forceucase::default(), HiddenHomonym::default())
if let Some(flags) =
self.check_word(word, Forceucase::default(), HiddenHomonym::default())
{
return Some(flags);
}

if self.check_lower_as_title && !matches!(casing, Casing::Pascal) {
let title = self.aff.options.case_handling.titlecase(word);
if let Some(flags) = self.spell_casing_title(&title) {
return Some(flags);
}
}

if self.check_lower_as_upper {
let upper = self.aff.options.case_handling.uppercase(word);
if let Some(flags) = self.spell_casing_upper(&upper) {
return Some(flags);
}
}

None
}
Casing::All => self.spell_casing_upper(word),
Casing::Init => self.spell_casing_title(word),
Expand Down Expand Up @@ -2644,4 +2712,12 @@ mod test {
assert!(EN_US.check("123rd"));
assert!(EN_US.check("1234th"));
}

#[test]
fn check_lower_as_other_casings() {
assert!(!EN_US.check("alice"));
assert!(!EN_US.check("rsvp"));
assert!(EN_US.checker().check_lower_as_title(true).check("alice"));
assert!(EN_US.checker().check_lower_as_upper(true).check("rsvp"));
}
}

0 comments on commit b6c31ea

Please sign in to comment.