-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: ensure loss of aa motif detected when it's the only motif
Currently Nextclade fails to detect a lost motif if the motif is the only one in its category. This is due to incorrect iteration of the category keys (names) of motifs. Here I implemented an iterator to correctly visit both the reference and query motif maps: it will visit the category even if it is entirely missing in one or the other map. Previously, the category missing from one map would not be considered, which caused silent failure to detect the disappearance of a motif. This was initially reported for the flu h5 datasets - the loss of `polybasic_cleavage_site` motif would not be detected. I tested by comparing the outputs of smoke tests from master branch and from this branch. The `polybasic_cleavage_site` motif loss is now detected in h5 datasets and there are no other changes.
- Loading branch information
1 parent
32cdeb0
commit 4483423
Showing
4 changed files
with
224 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
use std::collections::{BTreeMap, HashMap}; | ||
use std::hash::Hash; | ||
|
||
#[cfg(feature = "indexmap")] | ||
use indexmap::IndexMap; | ||
|
||
/// Generic interface for maps: HashMap, BTreeMap, IndexMap | ||
pub trait Map<K, V> { | ||
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K> | ||
where | ||
K: 'a; | ||
|
||
fn get(&self, key: &K) -> Option<&V>; | ||
} | ||
|
||
impl<K, V> Map<K, V> for BTreeMap<K, V> | ||
where | ||
K: Ord, | ||
{ | ||
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K> | ||
where | ||
K: 'a, | ||
{ | ||
self.keys() | ||
} | ||
|
||
fn get(&self, key: &K) -> Option<&V> { | ||
self.get(key) | ||
} | ||
} | ||
|
||
impl<K, V> Map<K, V> for HashMap<K, V> | ||
where | ||
K: Eq + Hash, | ||
{ | ||
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K> | ||
where | ||
K: 'a, | ||
{ | ||
self.keys() | ||
} | ||
|
||
fn get(&self, key: &K) -> Option<&V> { | ||
self.get(key) | ||
} | ||
} | ||
|
||
#[cfg(feature = "indexmap")] | ||
impl<K, V> Map<K, V> for IndexMap<K, V> | ||
where | ||
K: Eq + Hash, | ||
{ | ||
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K> | ||
where | ||
K: 'a, | ||
{ | ||
self.keys() | ||
} | ||
|
||
fn get(&self, key: &K) -> Option<&V> { | ||
IndexMap::get(self, key) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
use crate::utils::map::Map; | ||
use itertools::{chain, Itertools}; | ||
use std::fmt::Debug; | ||
use std::hash::Hash; | ||
use std::marker::PhantomData; | ||
|
||
/// Iterate a pair of maps synchronized by keys | ||
pub fn zip_by_key<'a, M1, M2, K, V1, V2>(map1: &'a M1, map2: &'a M2) -> ZipByKeyIter<'a, K, V1, V2, M1, M2> | ||
where | ||
K: Ord + Debug + Hash + 'a, | ||
V1: 'a, | ||
V2: 'a, | ||
M1: Map<K, V1>, | ||
M2: Map<K, V2>, | ||
{ | ||
ZipByKeyIter::new(map1, map2) | ||
} | ||
|
||
/// Iterator for a pair of maps, synchronized by keys | ||
pub struct ZipByKeyIter<'a, K, V1, V2, M1: Map<K, V1>, M2: Map<K, V2>> { | ||
keys: Vec<&'a K>, | ||
map1: &'a M1, | ||
map2: &'a M2, | ||
index: usize, | ||
_phantom: PhantomData<(V1, V2)>, | ||
} | ||
|
||
impl<'a, K, V1, V2, M1, M2> ZipByKeyIter<'a, K, V1, V2, M1, M2> | ||
where | ||
K: Ord + Debug + Hash + 'a, | ||
V1: 'a, | ||
V2: 'a, | ||
M1: Map<K, V1>, | ||
M2: Map<K, V2>, | ||
{ | ||
pub fn new(map1: &'a M1, map2: &'a M2) -> Self { | ||
let keys = chain!(map1.keys(), map2.keys()).unique().sorted().collect_vec(); | ||
ZipByKeyIter { | ||
keys, | ||
map1, | ||
map2, | ||
index: 0, | ||
_phantom: PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<'a, K, V1, V2, M1, M2> Iterator for ZipByKeyIter<'a, K, V1, V2, M1, M2> | ||
where | ||
K: Ord + 'a, | ||
V1: 'a, | ||
V2: 'a, | ||
M1: Map<K, V1>, | ||
M2: Map<K, V2>, | ||
{ | ||
type Item = (&'a K, Option<&'a V1>, Option<&'a V2>); | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
if self.index >= self.keys.len() { | ||
return None; | ||
} | ||
let key = self.keys[self.index]; | ||
let value1 = self.map1.get(key); | ||
let value2 = self.map2.get(key); | ||
self.index += 1; | ||
Some((key, value1, value2)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use maplit::{btreemap, hashmap}; | ||
use pretty_assertions::assert_eq; | ||
|
||
#[cfg(feature = "indexmap")] | ||
use indexmap::indexmap; | ||
|
||
#[test] | ||
fn test_zip_by_key_hashmap() { | ||
let map1 = hashmap! { | ||
'a' => "alpha", | ||
'b' => "bravo", | ||
'c' => "charlie", | ||
}; | ||
|
||
let map2 = hashmap! { | ||
'b' => "beans", | ||
'c' => "carrots", | ||
'd' => "dill", | ||
}; | ||
|
||
let mut iter = zip_by_key(&map1, &map2); | ||
|
||
assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None))); | ||
assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans")))); | ||
assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots")))); | ||
assert_eq!(iter.next(), Some((&'d', None, Some(&"dill")))); | ||
assert_eq!(iter.next(), None); | ||
} | ||
|
||
#[test] | ||
fn test_zip_by_key_btreemap() { | ||
let map1 = btreemap! { | ||
'a' => "alpha", | ||
'b' => "bravo", | ||
'c' => "charlie", | ||
}; | ||
|
||
let map2 = btreemap! { | ||
'b' => "beans", | ||
'c' => "carrots", | ||
'd' => "dill", | ||
}; | ||
|
||
let mut iter = zip_by_key(&map1, &map2); | ||
|
||
assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None))); | ||
assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans")))); | ||
assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots")))); | ||
assert_eq!(iter.next(), Some((&'d', None, Some(&"dill")))); | ||
assert_eq!(iter.next(), None); | ||
} | ||
|
||
#[cfg(feature = "indexmap")] | ||
#[test] | ||
fn test_zip_by_key_indexmap() { | ||
let map1 = indexmap! { | ||
'a' => "alpha", | ||
'b' => "bravo", | ||
'c' => "charlie", | ||
}; | ||
|
||
let map2 = indexmap! { | ||
'b' => "beans", | ||
'c' => "carrots", | ||
'd' => "dill", | ||
}; | ||
|
||
let mut iter = zip_by_key(&map1, &map2); | ||
|
||
assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None))); | ||
assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans")))); | ||
assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots")))); | ||
assert_eq!(iter.next(), Some((&'d', None, Some(&"dill")))); | ||
assert_eq!(iter.next(), None); | ||
} | ||
} |