Skip to content

Commit

Permalink
Merge pull request #1446 from nextstrain/fix/aa-motifs
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored May 9, 2024
2 parents 50db629 + 4483423 commit 2e631d8
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 16 deletions.
27 changes: 11 additions & 16 deletions packages/nextclade/src/analyze/find_aa_motifs_changes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ use crate::analyze::find_aa_motifs::{AaMotif, AaMotifWithoutSeq};
use crate::coord::position::AaRefPosition;
use crate::coord::range::{intersect, Range};
use crate::translate::translate_genes::Translation;
use crate::utils::collections::{cloned_into, zip_map_hashmap};
use crate::utils::collections::cloned_into;
use crate::utils::zip_map::zip_by_key;
use eyre::Report;
use itertools::{Either, Itertools, Zip};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashSet};
use std::fmt::Debug;
use std::ops::Deref;

pub type AaMotifsMap = BTreeMap<String, Vec<AaMotif>>;
pub type AaMotifsChangesMap = BTreeMap<String, AaMotifChanges>;
Expand Down Expand Up @@ -40,21 +42,14 @@ pub fn find_aa_motifs_changes(
ref_translation: &Translation,
qry_translation: &Translation,
) -> Result<AaMotifsChangesMap, Report> {
let query_motif_keys = aa_motifs_qry.keys().collect_vec();

// Exclude ref motifs missing in query
let aa_motifs_ref: AaMotifsMap = aa_motifs_ref
.iter()
.filter(|(name, _)| query_motif_keys.contains(name))
.map(|(key, val)| (key.clone(), val.clone()))
.collect();

zip_map_hashmap(&aa_motifs_ref, aa_motifs_qry, |name, motifs_ref, motifs_qry| {
let changes = find_aa_motifs_changes_one(motifs_ref, motifs_qry, ref_translation, qry_translation)?;

Ok((name.clone(), changes))
})
.collect()
zip_by_key(aa_motifs_ref, aa_motifs_qry)
.map(|(name, motifs_ref, motifs_qry)| {
let motifs_ref = motifs_ref.map(Deref::deref).unwrap_or_default();
let motifs_qry = motifs_qry.map(Deref::deref).unwrap_or_default();
let changes = find_aa_motifs_changes_one(motifs_ref, motifs_qry, ref_translation, qry_translation)?;
Ok((name.clone(), changes))
})
.collect()
}

fn find_aa_motifs_changes_one(
Expand Down
63 changes: 63 additions & 0 deletions packages/nextclade/src/utils/map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
use std::collections::{BTreeMap, HashMap};
use std::hash::Hash;

#[cfg(feature = "indexmap")]
use indexmap::IndexMap;

/// Generic interface for maps: HashMap, BTreeMap, IndexMap
pub trait Map<K, V> {
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K>
where
K: 'a;

fn get(&self, key: &K) -> Option<&V>;
}

impl<K, V> Map<K, V> for BTreeMap<K, V>
where
K: Ord,
{
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K>
where
K: 'a,
{
self.keys()
}

fn get(&self, key: &K) -> Option<&V> {
self.get(key)
}
}

impl<K, V> Map<K, V> for HashMap<K, V>
where
K: Eq + Hash,
{
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K>
where
K: 'a,
{
self.keys()
}

fn get(&self, key: &K) -> Option<&V> {
self.get(key)
}
}

#[cfg(feature = "indexmap")]
impl<K, V> Map<K, V> for IndexMap<K, V>
where
K: Eq + Hash,
{
fn keys<'a>(&'a self) -> impl Iterator<Item = &'a K>
where
K: 'a,
{
self.keys()
}

fn get(&self, key: &K) -> Option<&V> {
IndexMap::get(self, key)
}
}
2 changes: 2 additions & 0 deletions packages/nextclade/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ pub mod fs;
pub mod getenv;
pub mod global_init;
pub mod info;
pub mod map;
pub mod num;
pub mod option;
pub mod string;
pub mod vec2d;
pub mod wraparound;
pub mod zip_map;
148 changes: 148 additions & 0 deletions packages/nextclade/src/utils/zip_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
use crate::utils::map::Map;
use itertools::{chain, Itertools};
use std::fmt::Debug;
use std::hash::Hash;
use std::marker::PhantomData;

/// Iterate a pair of maps synchronized by keys
pub fn zip_by_key<'a, M1, M2, K, V1, V2>(map1: &'a M1, map2: &'a M2) -> ZipByKeyIter<'a, K, V1, V2, M1, M2>
where
K: Ord + Debug + Hash + 'a,
V1: 'a,
V2: 'a,
M1: Map<K, V1>,
M2: Map<K, V2>,
{
ZipByKeyIter::new(map1, map2)
}

/// Iterator for a pair of maps, synchronized by keys
pub struct ZipByKeyIter<'a, K, V1, V2, M1: Map<K, V1>, M2: Map<K, V2>> {
keys: Vec<&'a K>,
map1: &'a M1,
map2: &'a M2,
index: usize,
_phantom: PhantomData<(V1, V2)>,
}

impl<'a, K, V1, V2, M1, M2> ZipByKeyIter<'a, K, V1, V2, M1, M2>
where
K: Ord + Debug + Hash + 'a,
V1: 'a,
V2: 'a,
M1: Map<K, V1>,
M2: Map<K, V2>,
{
pub fn new(map1: &'a M1, map2: &'a M2) -> Self {
let keys = chain!(map1.keys(), map2.keys()).unique().sorted().collect_vec();
ZipByKeyIter {
keys,
map1,
map2,
index: 0,
_phantom: PhantomData,
}
}
}

impl<'a, K, V1, V2, M1, M2> Iterator for ZipByKeyIter<'a, K, V1, V2, M1, M2>
where
K: Ord + 'a,
V1: 'a,
V2: 'a,
M1: Map<K, V1>,
M2: Map<K, V2>,
{
type Item = (&'a K, Option<&'a V1>, Option<&'a V2>);

fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.keys.len() {
return None;
}
let key = self.keys[self.index];
let value1 = self.map1.get(key);
let value2 = self.map2.get(key);
self.index += 1;
Some((key, value1, value2))
}
}

#[cfg(test)]
mod tests {
use super::*;
use maplit::{btreemap, hashmap};
use pretty_assertions::assert_eq;

#[cfg(feature = "indexmap")]
use indexmap::indexmap;

#[test]
fn test_zip_by_key_hashmap() {
let map1 = hashmap! {
'a' => "alpha",
'b' => "bravo",
'c' => "charlie",
};

let map2 = hashmap! {
'b' => "beans",
'c' => "carrots",
'd' => "dill",
};

let mut iter = zip_by_key(&map1, &map2);

assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None)));
assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans"))));
assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots"))));
assert_eq!(iter.next(), Some((&'d', None, Some(&"dill"))));
assert_eq!(iter.next(), None);
}

#[test]
fn test_zip_by_key_btreemap() {
let map1 = btreemap! {
'a' => "alpha",
'b' => "bravo",
'c' => "charlie",
};

let map2 = btreemap! {
'b' => "beans",
'c' => "carrots",
'd' => "dill",
};

let mut iter = zip_by_key(&map1, &map2);

assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None)));
assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans"))));
assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots"))));
assert_eq!(iter.next(), Some((&'d', None, Some(&"dill"))));
assert_eq!(iter.next(), None);
}

#[cfg(feature = "indexmap")]
#[test]
fn test_zip_by_key_indexmap() {
let map1 = indexmap! {
'a' => "alpha",
'b' => "bravo",
'c' => "charlie",
};

let map2 = indexmap! {
'b' => "beans",
'c' => "carrots",
'd' => "dill",
};

let mut iter = zip_by_key(&map1, &map2);

assert_eq!(iter.next(), Some((&'a', Some(&"alpha"), None)));
assert_eq!(iter.next(), Some((&'b', Some(&"bravo"), Some(&"beans"))));
assert_eq!(iter.next(), Some((&'c', Some(&"charlie"), Some(&"carrots"))));
assert_eq!(iter.next(), Some((&'d', None, Some(&"dill"))));
assert_eq!(iter.next(), None);
}
}

0 comments on commit 2e631d8

Please sign in to comment.