Skip to content

Commit

Permalink
Merge pull request #887 from nextstrain/feat/reverse-if-seed-fails
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Jun 25, 2022
2 parents 8a3f109 + 18bdc94 commit 78d2156
Show file tree
Hide file tree
Showing 16 changed files with 119 additions and 37 deletions.
2 changes: 2 additions & 0 deletions packages_rs/nextclade-cli/src/cli/nextalign_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ pub fn nextalign_run(run_args: NextalignRunArgs) -> Result<(), Report> {
.wrap_err_with(|| format!("When processing sequence #{index} '{seq_name}'"))
.and_then(|qry_seq| {
nextalign_run_one(
index,
&seq_name,
&qry_seq,
ref_seq,
ref_peptides,
Expand Down
16 changes: 12 additions & 4 deletions packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::cli::nextalign_loop::NextalignRecord;
use eyre::{Report, WrapErr};
use log::warn;
use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX;
use nextclade::io::errors_csv::ErrorsCsvWriter;
use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
use nextclade::io::gene_map::GeneMap;
Expand Down Expand Up @@ -87,24 +88,31 @@ impl<'a> NextalignOrderedWriter<'a> {
translations,
warnings,
missing_genes,
is_reverse_complement,
} = output;

let seq_name = if *is_reverse_complement {
format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
} else {
seq_name.clone()
};

if let Some(fasta_writer) = &mut self.fasta_writer {
fasta_writer.write(seq_name, &from_nuc_seq(&stripped.qry_seq))?;
fasta_writer.write(&seq_name, &from_nuc_seq(&stripped.qry_seq))?;
}

if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer {
for translation in translations {
fasta_peptide_writer.write(seq_name, translation)?;
fasta_peptide_writer.write(&seq_name, translation)?;
}
}

if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
insertions_csv_writer.write(seq_name, &stripped.insertions, translations)?;
insertions_csv_writer.write(&seq_name, &stripped.insertions, translations)?;
}

if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
errors_csv_writer.write_aa_errors(seq_name, warnings, missing_genes)?;
errors_csv_writer.write_aa_errors(&seq_name, warnings, missing_genes)?;
}
}
Err(report) => {
Expand Down
1 change: 1 addition & 0 deletions packages_rs/nextclade-cli/src/cli/nextclade_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
.wrap_err_with(|| format!("When processing sequence #{index} '{seq_name}'"))
.and_then(|qry_seq| {
nextclade_run_one(
index,
&seq_name,
&qry_seq,
ref_seq,
Expand Down
8 changes: 8 additions & 0 deletions packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::cli::nextclade_loop::NextcladeRecord;
use eyre::{Report, WrapErr};
use itertools::Itertools;
use log::warn;
use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX;
use nextclade::io::errors_csv::ErrorsCsvWriter;
use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
use nextclade::io::gene_map::GeneMap;
Expand Down Expand Up @@ -118,9 +119,16 @@ impl<'a> NextcladeOrderedWriter<'a> {
warnings,
insertions,
missing_genes,
is_reverse_complement,
..
} = &nextclade_outputs;

let seq_name = if *is_reverse_complement {
format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
} else {
seq_name.clone()
};

if let Some(fasta_writer) = &mut self.fasta_writer {
fasta_writer.write(&seq_name, &from_nuc_seq(&qry_seq_stripped))?;
}
Expand Down
5 changes: 5 additions & 0 deletions packages_rs/nextclade-web/src/wasm/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ impl NextcladeParams {
#[wasm_bindgen]
#[derive(Clone, Serialize, Deserialize, TypescriptDefinition, Debug)]
pub struct AnalysisInput {
#[wasm_bindgen(getter_with_clone)]
pub qry_index: usize,

#[wasm_bindgen(getter_with_clone)]
pub qry_seq_name: String,

Expand Down Expand Up @@ -218,13 +221,15 @@ impl Nextclade {

pub fn run(&mut self, input: &AnalysisInput) -> Result<AnalysisResult, Report> {
let AnalysisInput {
qry_index,
qry_seq_name,
qry_seq_str,
} = input;

let qry_seq = &to_nuc_seq(qry_seq_str).wrap_err("When converting query sequence")?;

match nextclade_run_one(
*qry_index,
qry_seq_name,
qry_seq,
&self.ref_seq,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ async function analyze(record: FastaRecord): Promise<NextcladeResult> {
const { index, seqName, seq } = record

const input = AnalysisInput.from_js({
qry_index: index,
qry_seq_name: seqName,
qry_seq_str: seq,
})
Expand Down
70 changes: 42 additions & 28 deletions packages_rs/nextclade/src/align/align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,30 @@ use crate::io::aa::Aa;
use crate::io::letter::Letter;
use crate::io::nuc::Nuc;
use crate::make_error;
use crate::translate::complement::reverse_complement_in_place;
use eyre::Report;
use log::trace;
use log::{info, trace, warn};

fn align_pairwise<T: Letter<T>>(
qry_seq: &[T],
ref_seq: &[T],
gap_open_close: &[i32],
params: &AlignPairwiseParams,
stripes: &[Stripe],
) -> Result<AlignmentOutput<T>, Report> {
) -> AlignmentOutput<T> {
trace!("Align pairwise: started. Params: {params:?}");

let max_indel = params.max_indel;

let ScoreMatrixResult { scores, paths } = score_matrix(qry_seq, ref_seq, gap_open_close, stripes, params);

Ok(backtrace(qry_seq, ref_seq, &scores, &paths))
backtrace(qry_seq, ref_seq, &scores, &paths)
}

/// align nucleotide sequences via seed alignment and banded smith watermann without penalizing terminal gaps
pub fn align_nuc(
index: usize,
seq_name: &str,
qry_seq: &[Nuc],
ref_seq: &[Nuc],
gap_open_close: &[i32],
Expand All @@ -42,13 +45,24 @@ pub fn align_nuc(
);
}

let stripes = seed_alignment(qry_seq, ref_seq, params)?;

let result = align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes)?;

trace!("Score: {}", result.alignment_score);

Ok(result)
#[allow(clippy::map_err_ignore)]
match seed_alignment(qry_seq, ref_seq, params) {
Ok(stripes) => Ok(align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes)),
Err(report) => {
if params.retry_reverse_complement {
info!("When processing sequence #{index} '{seq_name}': Seed matching failed. Retrying reverse complement");
let mut qry_seq = qry_seq.to_owned();
reverse_complement_in_place(&mut qry_seq);
let stripes = seed_alignment(&qry_seq, ref_seq, params).map_err(|_| report)?;
let mut result = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
result.is_reverse_complement = true;
warn!("When processing sequence #{index} '{seq_name}': Sequence is reverse-complemented: Seed matching failed for the original sequence, but succeeded for its reverse complement. Outputs will be derived from the reverse complement and 'reverse complement' suffix will be added to sequence ID.");
Ok(result)
} else {
Err(report)
}
}
}
}

/// align amino acids using a fixed bandwidth banded alignment while penalizing terminal indels
Expand All @@ -59,7 +73,7 @@ pub fn align_aa(
params: &AlignPairwiseParams,
band_width: usize,
mean_shift: i32,
) -> Result<AlignmentOutput<Aa>, Report> {
) -> AlignmentOutput<Aa> {
let stripes = simple_stripes(mean_shift, band_width, ref_seq.len(), qry_seq.len());

align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes)
Expand Down Expand Up @@ -120,7 +134,7 @@ mod tests {
let qry_seq = to_nuc_seq("ACGCTCGCT")?;
let ref_seq = to_nuc_seq("ACGCTCGCT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq));
Expand All @@ -134,7 +148,7 @@ mod tests {
let ref_seq = to_nuc_seq("ACGCTCGCT")?;
let qry_aln = to_nuc_seq("-CGCTCGCT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -148,7 +162,7 @@ mod tests {
let ref_seq = to_nuc_seq("ACGCTCGCT")?;
let qry_aln = to_nuc_seq("---CTCGCT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -163,7 +177,7 @@ mod tests {
let qry_aln = to_nuc_seq("-----TCCAATCA")?;
// ^

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -178,7 +192,7 @@ mod tests {
let qry_aln = to_nuc_seq("-----TGTTACCTGCGC")?;
// ^^

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -192,7 +206,7 @@ mod tests {
let ref_seq = to_nuc_seq("ACGCTCGCT")?;
let qry_aln = to_nuc_seq("ACGCTC---")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -207,7 +221,7 @@ mod tests {
let qry_aln = to_nuc_seq("CCAATCAT-----")?;
// ^

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -222,7 +236,7 @@ mod tests {
let qry_aln = to_nuc_seq("CCGATCAT-----")?;
// ^ ^

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -236,7 +250,7 @@ mod tests {
let ref_seq = to_nuc_seq("GCCACGCTCGCT")?;
let qry_aln = to_nuc_seq("---ACGCTC---")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -250,7 +264,7 @@ mod tests {
let ref_seq = to_nuc_seq("ACGCTC")?;
let ref_aln = to_nuc_seq("---ACGCTC---")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq));
Expand All @@ -264,7 +278,7 @@ mod tests {
let ref_seq = to_nuc_seq("GCCACGCTCGCT")?;
let qry_aln = to_nuc_seq("GCCA--CTCCCT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

// assert_eq!(18, result.alignment_score);
assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
Expand All @@ -279,7 +293,7 @@ mod tests {
let ref_seq = to_nuc_seq("GCCACTCGCT")?;
let ref_aln = to_nuc_seq("GCCA--CTCGCT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq));
Expand All @@ -293,7 +307,7 @@ mod tests {
let ref_seq = to_nuc_seq("ACATATACTTC")?;
let qry_aln = to_nuc_seq("ACAT---CTTC")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -307,7 +321,7 @@ mod tests {
let ref_seq = to_nuc_seq("ACATCTTG")?;
let ref_aln = to_nuc_seq("ACAT---CTTG")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq));
Expand All @@ -322,7 +336,7 @@ mod tests {
let qry_aln = to_nuc_seq("AAAAAAAAAAAA----------")?;
let ref_aln = to_nuc_seq("---------AAATTTTTTTTTT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -337,7 +351,7 @@ mod tests {
let ref_aln = to_nuc_seq("AAAAAAAAAAAA----------")?;
let qry_aln = to_nuc_seq("---------AAATTTTTTTTTT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand All @@ -352,7 +366,7 @@ mod tests {
let ref_aln = to_nuc_seq("CTTGGAGGTTCCGTG----GCTAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGCGACACTTCGCATGGTG---AGCCTTTGT")?;
let qry_aln = to_nuc_seq("CTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATC-----AAGCTCATGGGACANNNNNCATGGTGGACAGCCTTTGT")?;

let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;
let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?;

assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq));
assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq));
Expand Down
3 changes: 3 additions & 0 deletions packages_rs/nextclade/src/align/backtrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub struct AlignmentOutput<T> {
pub qry_seq: Vec<T>,
pub ref_seq: Vec<T>,
pub alignment_score: i32,
pub is_reverse_complement: bool,
}

pub fn backtrace<T: Letter<T>>(
Expand Down Expand Up @@ -89,6 +90,7 @@ pub fn backtrace<T: Letter<T>>(
qry_seq: aln_qry,
ref_seq: aln_ref,
alignment_score: scores[(num_rows - 1, num_cols - 1)],
is_reverse_complement: false,
}
}

Expand Down Expand Up @@ -156,6 +158,7 @@ mod tests {
qry_seq: to_nuc_seq("---CTCGCT")?,
ref_seq: to_nuc_seq("ACGCTCGCT")?,
alignment_score: 18,
is_reverse_complement: false
};

let output = backtrace(&qry_seq, &ref_seq, &scores, &paths);
Expand Down
Loading

1 comment on commit 78d2156

@vercel
Copy link

@vercel vercel bot commented on 78d2156 Jun 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextclade – ./

nextclade-git-master-nextstrain.vercel.app
nextclade.vercel.app
nextclade-nextstrain.vercel.app

Please sign in to comment.