Skip to content

Commit

Permalink
Merge pull request #1481 from nextstrain/refactor/private-aa-mutation…
Browse files Browse the repository at this point in the history
…-groups
  • Loading branch information
ivan-aksamentov authored Jun 6, 2024
2 parents 2121a5b + 44d3f0d commit ba26a68
Show file tree
Hide file tree
Showing 18 changed files with 655 additions and 442 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ indexing_slicing = "allow"
integer_division = "allow"
iter_nth_zero = "allow"
large_digit_groups = "allow"
len_without_is_empty = "allow"
len_zero = "allow"
let_underscore_must_use = "allow"
manual_string_new = "allow"
Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/alphabet/aa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ impl Display for Aa {

impl Letter<Aa> for Aa {
const GAP: Aa = Aa::Gap;
const UNKNOWN: Aa = Aa::X;

#[inline]
fn is_gap(&self) -> bool {
Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/alphabet/letter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub trait ScoreMatrixLookup<T> {
/// Generic representation of a character defining nucleotide or amino acid
pub trait Letter<L>: Copy + Display + Eq + Ord + ScoreMatrixLookup<L> {
const GAP: L;
const UNKNOWN: L;

fn is_gap(&self) -> bool;

Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/alphabet/nuc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ impl Display for Nuc {

impl Letter<Nuc> for Nuc {
const GAP: Nuc = Nuc::Gap;
const UNKNOWN: Nuc = Nuc::N;

#[inline]
fn is_gap(&self) -> bool {
Expand Down
52 changes: 52 additions & 0 deletions packages/nextclade/src/analyze/aa_alignment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use crate::alphabet::aa::Aa;
use crate::analyze::aa_sub_min::AaSubMin;
use crate::coord::position::{AaRefPosition, PositionLike};
use crate::gene::cds::Cds;
use crate::translate::translate_genes::CdsTranslation;

/// Represents a pair of aligned amino acid sequences (resulting from pairwise alignment
pub struct AaAlignment<'c, 'r, 'q> {
cds: &'c Cds,
ref_tr: &'r CdsTranslation,
qry_tr: &'q CdsTranslation,
}

impl<'c, 'r, 'q> AaAlignment<'c, 'r, 'q> {
pub fn new(cds: &'c Cds, ref_tr: &'r CdsTranslation, qry_tr: &'q CdsTranslation) -> Self {
assert_eq!(ref_tr.seq.len(), qry_tr.seq.len());
Self { cds, ref_tr, qry_tr }
}

pub const fn cds(&self) -> &Cds {
self.cds
}

pub fn len(&self) -> usize {
self.ref_tr.seq.len()
}

pub fn ref_at(&self, pos: AaRefPosition) -> Aa {
self.ref_tr.seq[pos.as_usize()]
}

pub fn qry_at(&self, pos: AaRefPosition) -> Aa {
self.qry_tr.seq[pos.as_usize()]
}

pub fn mut_at(&self, pos: AaRefPosition) -> AaSubMin {
AaSubMin {
ref_aa: self.ref_at(pos),
pos,
qry_aa: self.qry_at(pos),
}
}

pub fn is_sequenced(&self, pos: AaRefPosition) -> bool {
pos >= 0
&& self
.qry_tr
.alignment_ranges
.iter()
.any(|aa_alignment_range| aa_alignment_range.contains(pos))
}
}
107 changes: 107 additions & 0 deletions packages/nextclade/src/analyze/aa_change_with_context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use crate::alphabet::aa::Aa;
use crate::alphabet::letter::{serde_deserialize_seq, serde_serialize_seq};
use crate::alphabet::nuc::Nuc;
use crate::analyze::aa_sub_min::AaSubMin;
use crate::analyze::abstract_mutation::{AbstractMutation, MutParams, Pos, QryLetter, RefLetter};
use crate::analyze::nuc_alignment::NucAlignment;
use crate::coord::coord_map_cds_to_global::cds_codon_pos_to_ref_range;
use crate::coord::position::{AaRefPosition, NucRefGlobalPosition};
use crate::coord::range::NucRefGlobalRange;
use crate::gene::cds::Cds;
use crate::gene::gene::GeneStrand;
use crate::translate::complement::reverse_complement_in_place;
use itertools::Itertools;
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, Default, Serialize, Deserialize, schemars::JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct AaChangeWithContext {
pub cds_name: String,
pub pos: AaRefPosition,
pub ref_aa: Aa,
pub qry_aa: Aa,
pub nuc_pos: NucRefGlobalPosition,

#[schemars(with = "String")]
#[serde(serialize_with = "serde_serialize_seq")]
#[serde(deserialize_with = "serde_deserialize_seq")]
pub ref_triplet: Vec<Nuc>,

#[schemars(with = "String")]
#[serde(serialize_with = "serde_serialize_seq")]
#[serde(deserialize_with = "serde_deserialize_seq")]
pub qry_triplet: Vec<Nuc>,
pub nuc_ranges: Vec<NucRefGlobalRange>,
}

impl Pos<AaRefPosition> for AaChangeWithContext {
fn pos(&self) -> AaRefPosition {
self.pos
}
}

impl QryLetter<Aa> for AaChangeWithContext {
fn qry_letter(&self) -> Aa {
self.qry_aa
}
}

impl RefLetter<Aa> for AaChangeWithContext {
fn ref_letter(&self) -> Aa {
self.ref_aa
}
}

impl AbstractMutation<AaRefPosition, Aa> for AaChangeWithContext {
fn clone_with(&self, params: MutParams<AaRefPosition, Aa>) -> Self {
Self {
cds_name: self.cds_name.clone(),
pos: params.pos,
ref_aa: params.ref_letter,
qry_aa: params.qry_letter,
..self.clone()
}
}
}

impl AaChangeWithContext {
pub fn new(cds: &Cds, sub: &AaSubMin, aln: &NucAlignment) -> Self {
let AaSubMin { pos, ref_aa, qry_aa } = *sub;
let nuc_ranges = cds_codon_pos_to_ref_range(cds, pos);

let ref_triplet = nuc_ranges
.iter()
.flat_map(|(range, strand)| {
let mut nucs = aln.ref_range(range).to_vec();
if strand == &GeneStrand::Reverse {
reverse_complement_in_place(&mut nucs);
}
nucs
})
.collect_vec();

let qry_triplet = nuc_ranges
.iter()
.flat_map(|(range, strand)| {
let mut nucs = aln.qry_range(range).to_vec();
if strand == &GeneStrand::Reverse {
reverse_complement_in_place(&mut nucs);
}
nucs
})
.collect_vec();

let nuc_ranges = nuc_ranges.into_iter().map(|(range, _)| range).collect_vec();

Self {
cds_name: cds.name.clone(),
pos,
ref_aa,
qry_aa,
nuc_pos: nuc_ranges[0].begin,
nuc_ranges,
ref_triplet,
qry_triplet,
}
}
}
Loading

0 comments on commit ba26a68

Please sign in to comment.