Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: private aa mutation groups #1481

Merged
merged 8 commits into from
Jun 6, 2024
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ indexing_slicing = "allow"
integer_division = "allow"
iter_nth_zero = "allow"
large_digit_groups = "allow"
len_without_is_empty = "allow"
len_zero = "allow"
let_underscore_must_use = "allow"
manual_string_new = "allow"
Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/alphabet/aa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ impl Display for Aa {

impl Letter<Aa> for Aa {
const GAP: Aa = Aa::Gap;
const UNKNOWN: Aa = Aa::X;

#[inline]
fn is_gap(&self) -> bool {
Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/alphabet/letter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub trait ScoreMatrixLookup<T> {
/// Generic representation of a character defining nucleotide or amino acid
pub trait Letter<L>: Copy + Display + Eq + Ord + ScoreMatrixLookup<L> {
const GAP: L;
const UNKNOWN: L;

fn is_gap(&self) -> bool;

Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/alphabet/nuc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ impl Display for Nuc {

impl Letter<Nuc> for Nuc {
const GAP: Nuc = Nuc::Gap;
const UNKNOWN: Nuc = Nuc::N;

#[inline]
fn is_gap(&self) -> bool {
Expand Down
52 changes: 52 additions & 0 deletions packages/nextclade/src/analyze/aa_alignment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use crate::alphabet::aa::Aa;
use crate::analyze::aa_sub_min::AaSubMin;
use crate::coord::position::{AaRefPosition, PositionLike};
use crate::gene::cds::Cds;
use crate::translate::translate_genes::CdsTranslation;

/// Represents a pair of aligned amino acid sequences (resulting from pairwise alignment
pub struct AaAlignment<'c, 'r, 'q> {
cds: &'c Cds,
ref_tr: &'r CdsTranslation,
qry_tr: &'q CdsTranslation,
}

impl<'c, 'r, 'q> AaAlignment<'c, 'r, 'q> {
pub fn new(cds: &'c Cds, ref_tr: &'r CdsTranslation, qry_tr: &'q CdsTranslation) -> Self {
assert_eq!(ref_tr.seq.len(), qry_tr.seq.len());
Self { cds, ref_tr, qry_tr }
}

pub const fn cds(&self) -> &Cds {
self.cds
}

pub fn len(&self) -> usize {
self.ref_tr.seq.len()
}

pub fn ref_at(&self, pos: AaRefPosition) -> Aa {
self.ref_tr.seq[pos.as_usize()]
}

pub fn qry_at(&self, pos: AaRefPosition) -> Aa {
self.qry_tr.seq[pos.as_usize()]
}

pub fn mut_at(&self, pos: AaRefPosition) -> AaSubMin {
AaSubMin {
ref_aa: self.ref_at(pos),
pos,
qry_aa: self.qry_at(pos),
}
}

pub fn is_sequenced(&self, pos: AaRefPosition) -> bool {
pos >= 0
&& self
.qry_tr
.alignment_ranges
.iter()
.any(|aa_alignment_range| aa_alignment_range.contains(pos))
}
}
107 changes: 107 additions & 0 deletions packages/nextclade/src/analyze/aa_change_with_context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use crate::alphabet::aa::Aa;
use crate::alphabet::letter::{serde_deserialize_seq, serde_serialize_seq};
use crate::alphabet::nuc::Nuc;
use crate::analyze::aa_sub_min::AaSubMin;
use crate::analyze::abstract_mutation::{AbstractMutation, MutParams, Pos, QryLetter, RefLetter};
use crate::analyze::nuc_alignment::NucAlignment;
use crate::coord::coord_map_cds_to_global::cds_codon_pos_to_ref_range;
use crate::coord::position::{AaRefPosition, NucRefGlobalPosition};
use crate::coord::range::NucRefGlobalRange;
use crate::gene::cds::Cds;
use crate::gene::gene::GeneStrand;
use crate::translate::complement::reverse_complement_in_place;
use itertools::Itertools;
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, Default, Serialize, Deserialize, schemars::JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct AaChangeWithContext {
pub cds_name: String,
pub pos: AaRefPosition,
pub ref_aa: Aa,
pub qry_aa: Aa,
pub nuc_pos: NucRefGlobalPosition,

#[schemars(with = "String")]
#[serde(serialize_with = "serde_serialize_seq")]
#[serde(deserialize_with = "serde_deserialize_seq")]
pub ref_triplet: Vec<Nuc>,

#[schemars(with = "String")]
#[serde(serialize_with = "serde_serialize_seq")]
#[serde(deserialize_with = "serde_deserialize_seq")]
pub qry_triplet: Vec<Nuc>,
pub nuc_ranges: Vec<NucRefGlobalRange>,
}

impl Pos<AaRefPosition> for AaChangeWithContext {
fn pos(&self) -> AaRefPosition {
self.pos
}
}

impl QryLetter<Aa> for AaChangeWithContext {
fn qry_letter(&self) -> Aa {
self.qry_aa
}
}

impl RefLetter<Aa> for AaChangeWithContext {
fn ref_letter(&self) -> Aa {
self.ref_aa
}
}

impl AbstractMutation<AaRefPosition, Aa> for AaChangeWithContext {
fn clone_with(&self, params: MutParams<AaRefPosition, Aa>) -> Self {
Self {
cds_name: self.cds_name.clone(),
pos: params.pos,
ref_aa: params.ref_letter,
qry_aa: params.qry_letter,
..self.clone()
}
}
}

impl AaChangeWithContext {
pub fn new(cds: &Cds, sub: &AaSubMin, aln: &NucAlignment) -> Self {
let AaSubMin { pos, ref_aa, qry_aa } = *sub;
let nuc_ranges = cds_codon_pos_to_ref_range(cds, pos);

let ref_triplet = nuc_ranges
.iter()
.flat_map(|(range, strand)| {
let mut nucs = aln.ref_range(range).to_vec();
if strand == &GeneStrand::Reverse {
reverse_complement_in_place(&mut nucs);
}
nucs
})
.collect_vec();

let qry_triplet = nuc_ranges
.iter()
.flat_map(|(range, strand)| {
let mut nucs = aln.qry_range(range).to_vec();
if strand == &GeneStrand::Reverse {
reverse_complement_in_place(&mut nucs);
}
nucs
})
.collect_vec();

let nuc_ranges = nuc_ranges.into_iter().map(|(range, _)| range).collect_vec();

Self {
cds_name: cds.name.clone(),
pos,
ref_aa,
qry_aa,
nuc_pos: nuc_ranges[0].begin,
nuc_ranges,
ref_triplet,
qry_triplet,
}
}
}
Loading
Loading