diff --git a/packages/nextclade/src/analyze/find_aa_motifs_changes.rs b/packages/nextclade/src/analyze/find_aa_motifs_changes.rs index 9f4b5b6e6..dfb93d501 100644 --- a/packages/nextclade/src/analyze/find_aa_motifs_changes.rs +++ b/packages/nextclade/src/analyze/find_aa_motifs_changes.rs @@ -3,12 +3,14 @@ use crate::analyze::find_aa_motifs::{AaMotif, AaMotifWithoutSeq}; use crate::coord::position::AaRefPosition; use crate::coord::range::{intersect, Range}; use crate::translate::translate_genes::Translation; -use crate::utils::collections::{cloned_into, zip_map_hashmap}; +use crate::utils::collections::cloned_into; +use crate::utils::zip_map::zip_by_key; use eyre::Report; use itertools::{Either, Itertools, Zip}; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, HashSet}; use std::fmt::Debug; +use std::ops::Deref; pub type AaMotifsMap = BTreeMap>; pub type AaMotifsChangesMap = BTreeMap; @@ -40,21 +42,14 @@ pub fn find_aa_motifs_changes( ref_translation: &Translation, qry_translation: &Translation, ) -> Result { - let query_motif_keys = aa_motifs_qry.keys().collect_vec(); - - // Exclude ref motifs missing in query - let aa_motifs_ref: AaMotifsMap = aa_motifs_ref - .iter() - .filter(|(name, _)| query_motif_keys.contains(name)) - .map(|(key, val)| (key.clone(), val.clone())) - .collect(); - - zip_map_hashmap(&aa_motifs_ref, aa_motifs_qry, |name, motifs_ref, motifs_qry| { - let changes = find_aa_motifs_changes_one(motifs_ref, motifs_qry, ref_translation, qry_translation)?; - - Ok((name.clone(), changes)) - }) - .collect() + zip_by_key(aa_motifs_ref, aa_motifs_qry) + .map(|(name, motifs_ref, motifs_qry)| { + let motifs_ref = motifs_ref.map(Deref::deref).unwrap_or_default(); + let motifs_qry = motifs_qry.map(Deref::deref).unwrap_or_default(); + let changes = find_aa_motifs_changes_one(motifs_ref, motifs_qry, ref_translation, qry_translation)?; + Ok((name.clone(), changes)) + }) + .collect() } fn find_aa_motifs_changes_one( diff --git a/packages/nextclade/src/utils/map.rs b/packages/nextclade/src/utils/map.rs new file mode 100644 index 000000000..ec45f57a9 --- /dev/null +++ b/packages/nextclade/src/utils/map.rs @@ -0,0 +1,63 @@ +use std::collections::{BTreeMap, HashMap}; +use std::hash::Hash; + +#[cfg(feature = "indexmap")] +use indexmap::IndexMap; + +/// Generic interface for maps: HashMap, BTreeMap, IndexMap +pub trait Map { + fn keys<'a>(&'a self) -> impl Iterator + where + K: 'a; + + fn get(&self, key: &K) -> Option<&V>; +} + +impl Map for BTreeMap +where + K: Ord, +{ + fn keys<'a>(&'a self) -> impl Iterator + where + K: 'a, + { + self.keys() + } + + fn get(&self, key: &K) -> Option<&V> { + self.get(key) + } +} + +impl Map for HashMap +where + K: Eq + Hash, +{ + fn keys<'a>(&'a self) -> impl Iterator + where + K: 'a, + { + self.keys() + } + + fn get(&self, key: &K) -> Option<&V> { + self.get(key) + } +} + +#[cfg(feature = "indexmap")] +impl Map for IndexMap +where + K: Eq + Hash, +{ + fn keys<'a>(&'a self) -> impl Iterator + where + K: 'a, + { + self.keys() + } + + fn get(&self, key: &K) -> Option<&V> { + IndexMap::get(self, key) + } +} diff --git a/packages/nextclade/src/utils/mod.rs b/packages/nextclade/src/utils/mod.rs index 6e8c4909e..1b6f01f18 100644 --- a/packages/nextclade/src/utils/mod.rs +++ b/packages/nextclade/src/utils/mod.rs @@ -7,8 +7,10 @@ pub mod fs; pub mod getenv; pub mod global_init; pub mod info; +pub mod map; pub mod num; pub mod option; pub mod string; pub mod vec2d; pub mod wraparound; +pub mod zip_map; diff --git a/packages/nextclade/src/utils/zip_map.rs b/packages/nextclade/src/utils/zip_map.rs new file mode 100644 index 000000000..126bca188 --- /dev/null +++ b/packages/nextclade/src/utils/zip_map.rs @@ -0,0 +1,148 @@ +use crate::utils::map::Map; +use itertools::{chain, Itertools}; +use std::fmt::Debug; +use std::hash::Hash; +use std::marker::PhantomData; + +/// Iterate a pair of maps synchronized by keys +pub fn zip_by_key<'a, M1, M2, K, V1, V2>(map1: &'a M1, map2: &'a M2) -> ZipByKeyIter<'a, K, V1, V2, M1, M2> +where + K: Ord + Debug + Hash + 'a, + V1: 'a, + V2: 'a, + M1: Map, + M2: Map, +{ + ZipByKeyIter::new(map1, map2) +} + +/// Iterator for a pair of maps, synchronized by keys +pub struct ZipByKeyIter<'a, K, V1, V2, M1: Map, M2: Map> { + keys: Vec<&'a K>, + map1: &'a M1, + map2: &'a M2, + index: usize, + _phantom: PhantomData<(V1, V2)>, +} + +impl<'a, K, V1, V2, M1, M2> ZipByKeyIter<'a, K, V1, V2, M1, M2> +where + K: Ord + Debug + Hash + 'a, + V1: 'a, + V2: 'a, + M1: Map, + M2: Map, +{ + pub fn new(map1: &'a M1, map2: &'a M2) -> Self { + let keys = chain!(map1.keys(), map2.keys()).unique().sorted().collect_vec(); + ZipByKeyIter { + keys, + map1, + map2, + index: 0, + _phantom: PhantomData, + } + } +} + +impl<'a, K, V1, V2, M1, M2> Iterator for ZipByKeyIter<'a, K, V1, V2, M1, M2> +where + K: Ord + 'a, + V1: 'a, + V2: 'a, + M1: Map, + M2: Map, +{ + type Item = (&'a K, Option<&'a V1>, Option<&'a V2>); + + fn next(&mut self) -> Option { + if self.index >= self.keys.len() { + return None; + } + let key = self.keys[self.index]; + let value1 = self.map1.get(key); + let value2 = self.map2.get(key); + self.index += 1; + Some((key, value1, value2)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use maplit::{btreemap, hashmap}; + use pretty_assertions::assert_eq; + + #[cfg(feature = "indexmap")] + use indexmap::indexmap; + + #[test] + fn test_zip_by_key_hashmap() { + let map1 = hashmap! { + 'a' => "alpha", + 'b' => "bravo", + 'c' => "charlie", + }; + + let map2 = hashmap! { + 'b' => "beans", + 'c' => "carrots", + 'd' => "dill", + }; + + let mut iter = zip_by_key(&map1, &map2); + + assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None))); + assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans")))); + assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots")))); + assert_eq!(iter.next(), Some((&'d', None, Some(&"dill")))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_zip_by_key_btreemap() { + let map1 = btreemap! { + 'a' => "alpha", + 'b' => "bravo", + 'c' => "charlie", + }; + + let map2 = btreemap! { + 'b' => "beans", + 'c' => "carrots", + 'd' => "dill", + }; + + let mut iter = zip_by_key(&map1, &map2); + + assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None))); + assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans")))); + assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots")))); + assert_eq!(iter.next(), Some((&'d', None, Some(&"dill")))); + assert_eq!(iter.next(), None); + } + + #[cfg(feature = "indexmap")] + #[test] + fn test_zip_by_key_indexmap() { + let map1 = indexmap! { + 'a' => "alpha", + 'b' => "bravo", + 'c' => "charlie", + }; + + let map2 = indexmap! { + 'b' => "beans", + 'c' => "carrots", + 'd' => "dill", + }; + + let mut iter = zip_by_key(&map1, &map2); + + assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None))); + assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans")))); + assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots")))); + assert_eq!(iter.next(), Some((&'d', None, Some(&"dill")))); + assert_eq!(iter.next(), None); + } +}