diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs b/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs index 71304276b..241c66052 100644 --- a/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs +++ b/packages_rs/nextclade-cli/src/cli/nextalign_loop.rs @@ -115,6 +115,8 @@ pub fn nextalign_run(run_args: NextalignRunArgs) -> Result<(), Report> { .wrap_err_with(|| format!("When processing sequence #{index} '{seq_name}'")) .and_then(|qry_seq| { nextalign_run_one( + index, + &seq_name, &qry_seq, ref_seq, ref_peptides, diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs index 49f4ff6e4..3052600eb 100644 --- a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs +++ b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs @@ -1,6 +1,7 @@ use crate::cli::nextalign_loop::NextalignRecord; use eyre::{Report, WrapErr}; use log::warn; +use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX; use nextclade::io::errors_csv::ErrorsCsvWriter; use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter}; use nextclade::io::gene_map::GeneMap; @@ -87,24 +88,31 @@ impl<'a> NextalignOrderedWriter<'a> { translations, warnings, missing_genes, + is_reverse_complement, } = output; + let seq_name = if *is_reverse_complement { + format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}") + } else { + seq_name.clone() + }; + if let Some(fasta_writer) = &mut self.fasta_writer { - fasta_writer.write(seq_name, &from_nuc_seq(&stripped.qry_seq))?; + fasta_writer.write(&seq_name, &from_nuc_seq(&stripped.qry_seq))?; } if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer { for translation in translations { - fasta_peptide_writer.write(seq_name, translation)?; + fasta_peptide_writer.write(&seq_name, translation)?; } } if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer { - insertions_csv_writer.write(seq_name, &stripped.insertions, translations)?; + insertions_csv_writer.write(&seq_name, &stripped.insertions, translations)?; } if let Some(errors_csv_writer) = &mut self.errors_csv_writer { - errors_csv_writer.write_aa_errors(seq_name, warnings, missing_genes)?; + errors_csv_writer.write_aa_errors(&seq_name, warnings, missing_genes)?; } } Err(report) => { diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs b/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs index f2867b4ce..9c08e450e 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_loop.rs @@ -178,6 +178,7 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> { .wrap_err_with(|| format!("When processing sequence #{index} '{seq_name}'")) .and_then(|qry_seq| { nextclade_run_one( + index, &seq_name, &qry_seq, ref_seq, diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs index 934294d49..3b5a064d4 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs @@ -2,6 +2,7 @@ use crate::cli::nextclade_loop::NextcladeRecord; use eyre::{Report, WrapErr}; use itertools::Itertools; use log::warn; +use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX; use nextclade::io::errors_csv::ErrorsCsvWriter; use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter}; use nextclade::io::gene_map::GeneMap; @@ -118,9 +119,16 @@ impl<'a> NextcladeOrderedWriter<'a> { warnings, insertions, missing_genes, + is_reverse_complement, .. } = &nextclade_outputs; + let seq_name = if *is_reverse_complement { + format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}") + } else { + seq_name.clone() + }; + if let Some(fasta_writer) = &mut self.fasta_writer { fasta_writer.write(&seq_name, &from_nuc_seq(&qry_seq_stripped))?; } diff --git a/packages_rs/nextclade-web/src/wasm/analyze.rs b/packages_rs/nextclade-web/src/wasm/analyze.rs index cd60aeaed..c20d92d39 100644 --- a/packages_rs/nextclade-web/src/wasm/analyze.rs +++ b/packages_rs/nextclade-web/src/wasm/analyze.rs @@ -76,6 +76,9 @@ impl NextcladeParams { #[wasm_bindgen] #[derive(Clone, Serialize, Deserialize, TypescriptDefinition, Debug)] pub struct AnalysisInput { + #[wasm_bindgen(getter_with_clone)] + pub qry_index: usize, + #[wasm_bindgen(getter_with_clone)] pub qry_seq_name: String, @@ -218,6 +221,7 @@ impl Nextclade { pub fn run(&mut self, input: &AnalysisInput) -> Result { let AnalysisInput { + qry_index, qry_seq_name, qry_seq_str, } = input; @@ -225,6 +229,7 @@ impl Nextclade { let qry_seq = &to_nuc_seq(qry_seq_str).wrap_err("When converting query sequence")?; match nextclade_run_one( + *qry_index, qry_seq_name, qry_seq, &self.ref_seq, diff --git a/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts b/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts index 1e43165b8..e22f2f6cc 100644 --- a/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts +++ b/packages_rs/nextclade-web/src/workers/nextcladeWasm.worker.ts @@ -90,6 +90,7 @@ async function analyze(record: FastaRecord): Promise { const { index, seqName, seq } = record const input = AnalysisInput.from_js({ + qry_index: index, qry_seq_name: seqName, qry_seq_str: seq, }) diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index 5378f8ddc..83f7d9173 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -8,8 +8,9 @@ use crate::io::aa::Aa; use crate::io::letter::Letter; use crate::io::nuc::Nuc; use crate::make_error; +use crate::translate::complement::reverse_complement_in_place; use eyre::Report; -use log::trace; +use log::{info, trace, warn}; fn align_pairwise>( qry_seq: &[T], @@ -17,18 +18,20 @@ fn align_pairwise>( gap_open_close: &[i32], params: &AlignPairwiseParams, stripes: &[Stripe], -) -> Result, Report> { +) -> AlignmentOutput { trace!("Align pairwise: started. Params: {params:?}"); let max_indel = params.max_indel; let ScoreMatrixResult { scores, paths } = score_matrix(qry_seq, ref_seq, gap_open_close, stripes, params); - Ok(backtrace(qry_seq, ref_seq, &scores, &paths)) + backtrace(qry_seq, ref_seq, &scores, &paths) } /// align nucleotide sequences via seed alignment and banded smith watermann without penalizing terminal gaps pub fn align_nuc( + index: usize, + seq_name: &str, qry_seq: &[Nuc], ref_seq: &[Nuc], gap_open_close: &[i32], @@ -42,13 +45,24 @@ pub fn align_nuc( ); } - let stripes = seed_alignment(qry_seq, ref_seq, params)?; - - let result = align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes)?; - - trace!("Score: {}", result.alignment_score); - - Ok(result) + #[allow(clippy::map_err_ignore)] + match seed_alignment(qry_seq, ref_seq, params) { + Ok(stripes) => Ok(align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes)), + Err(report) => { + if params.retry_reverse_complement { + info!("When processing sequence #{index} '{seq_name}': Seed matching failed. Retrying reverse complement"); + let mut qry_seq = qry_seq.to_owned(); + reverse_complement_in_place(&mut qry_seq); + let stripes = seed_alignment(&qry_seq, ref_seq, params).map_err(|_| report)?; + let mut result = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); + result.is_reverse_complement = true; + warn!("When processing sequence #{index} '{seq_name}': Sequence is reverse-complemented: Seed matching failed for the original sequence, but succeeded for its reverse complement. Outputs will be derived from the reverse complement and 'reverse complement' suffix will be added to sequence ID."); + Ok(result) + } else { + Err(report) + } + } + } } /// align amino acids using a fixed bandwidth banded alignment while penalizing terminal indels @@ -59,7 +73,7 @@ pub fn align_aa( params: &AlignPairwiseParams, band_width: usize, mean_shift: i32, -) -> Result, Report> { +) -> AlignmentOutput { let stripes = simple_stripes(mean_shift, band_width, ref_seq.len(), qry_seq.len()); align_pairwise(qry_seq, ref_seq, gap_open_close, params, &stripes) @@ -120,7 +134,7 @@ mod tests { let qry_seq = to_nuc_seq("ACGCTCGCT")?; let ref_seq = to_nuc_seq("ACGCTCGCT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq)); @@ -134,7 +148,7 @@ mod tests { let ref_seq = to_nuc_seq("ACGCTCGCT")?; let qry_aln = to_nuc_seq("-CGCTCGCT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -148,7 +162,7 @@ mod tests { let ref_seq = to_nuc_seq("ACGCTCGCT")?; let qry_aln = to_nuc_seq("---CTCGCT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -163,7 +177,7 @@ mod tests { let qry_aln = to_nuc_seq("-----TCCAATCA")?; // ^ - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -178,7 +192,7 @@ mod tests { let qry_aln = to_nuc_seq("-----TGTTACCTGCGC")?; // ^^ - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -192,7 +206,7 @@ mod tests { let ref_seq = to_nuc_seq("ACGCTCGCT")?; let qry_aln = to_nuc_seq("ACGCTC---")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -207,7 +221,7 @@ mod tests { let qry_aln = to_nuc_seq("CCAATCAT-----")?; // ^ - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -222,7 +236,7 @@ mod tests { let qry_aln = to_nuc_seq("CCGATCAT-----")?; // ^ ^ - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -236,7 +250,7 @@ mod tests { let ref_seq = to_nuc_seq("GCCACGCTCGCT")?; let qry_aln = to_nuc_seq("---ACGCTC---")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -250,7 +264,7 @@ mod tests { let ref_seq = to_nuc_seq("ACGCTC")?; let ref_aln = to_nuc_seq("---ACGCTC---")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq)); @@ -264,7 +278,7 @@ mod tests { let ref_seq = to_nuc_seq("GCCACGCTCGCT")?; let qry_aln = to_nuc_seq("GCCA--CTCCCT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; // assert_eq!(18, result.alignment_score); assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); @@ -279,7 +293,7 @@ mod tests { let ref_seq = to_nuc_seq("GCCACTCGCT")?; let ref_aln = to_nuc_seq("GCCA--CTCGCT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq)); @@ -293,7 +307,7 @@ mod tests { let ref_seq = to_nuc_seq("ACATATACTTC")?; let qry_aln = to_nuc_seq("ACAT---CTTC")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_seq), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -307,7 +321,7 @@ mod tests { let ref_seq = to_nuc_seq("ACATCTTG")?; let ref_aln = to_nuc_seq("ACAT---CTTG")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_seq), from_nuc_seq(&result.qry_seq)); @@ -322,7 +336,7 @@ mod tests { let qry_aln = to_nuc_seq("AAAAAAAAAAAA----------")?; let ref_aln = to_nuc_seq("---------AAATTTTTTTTTT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -337,7 +351,7 @@ mod tests { let ref_aln = to_nuc_seq("AAAAAAAAAAAA----------")?; let qry_aln = to_nuc_seq("---------AAATTTTTTTTTT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); @@ -352,7 +366,7 @@ mod tests { let ref_aln = to_nuc_seq("CTTGGAGGTTCCGTG----GCTAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGCGACACTTCGCATGGTG---AGCCTTTGT")?; let qry_aln = to_nuc_seq("CTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATC-----AAGCTCATGGGACANNNNNCATGGTGGACAGCCTTTGT")?; - let result = align_nuc(&qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; + let result = align_nuc(0, "", &qry_seq, &ref_seq, &ctx.gap_open_close, &ctx.params)?; assert_eq!(from_nuc_seq(&ref_aln), from_nuc_seq(&result.ref_seq)); assert_eq!(from_nuc_seq(&qry_aln), from_nuc_seq(&result.qry_seq)); diff --git a/packages_rs/nextclade/src/align/backtrace.rs b/packages_rs/nextclade/src/align/backtrace.rs index ae60a0962..fb53c9ef2 100644 --- a/packages_rs/nextclade/src/align/backtrace.rs +++ b/packages_rs/nextclade/src/align/backtrace.rs @@ -14,6 +14,7 @@ pub struct AlignmentOutput { pub qry_seq: Vec, pub ref_seq: Vec, pub alignment_score: i32, + pub is_reverse_complement: bool, } pub fn backtrace>( @@ -89,6 +90,7 @@ pub fn backtrace>( qry_seq: aln_qry, ref_seq: aln_ref, alignment_score: scores[(num_rows - 1, num_cols - 1)], + is_reverse_complement: false, } } @@ -156,6 +158,7 @@ mod tests { qry_seq: to_nuc_seq("---CTCGCT")?, ref_seq: to_nuc_seq("ACGCTCGCT")?, alignment_score: 18, + is_reverse_complement: false }; let output = backtrace(&qry_seq, &ref_seq, &scores, &paths); diff --git a/packages_rs/nextclade/src/align/params.rs b/packages_rs/nextclade/src/align/params.rs index 4ee255424..a27575346 100644 --- a/packages_rs/nextclade/src/align/params.rs +++ b/packages_rs/nextclade/src/align/params.rs @@ -13,6 +13,7 @@ pub enum GapAlignmentSide { // as well as adds a method `.merge_opt(&opt)` to the original struct, which merges values from the optional counterpart // into self (mutably). +#[allow(clippy::struct_excessive_bools)] #[optfield(pub AlignPairwiseParamsOptional, attrs, doc, field_attrs, field_doc, merge_fn = pub)] #[derive(Parser, Debug, Clone, Serialize, Deserialize)] pub struct AlignPairwiseParams { @@ -70,8 +71,12 @@ pub struct AlignPairwiseParams { #[clap(long)] pub seed_spacing: i32, + /// Retry seed matching step with a reverse complement if the first attempt failed + #[clap(long, takes_value = false, forbid_empty_values = false, default_missing_value = "true")] + pub retry_reverse_complement: bool, + /// If this flag is present, the amino acid sequences will be truncated at the first stop codon, if mutations or sequencing errors cause premature stop codons to be present. No amino acid mutations in the truncated region will be recorded. - #[clap(long)] + #[clap(long, takes_value = false, forbid_empty_values = false, default_missing_value = "true")] pub no_translate_past_stop: bool, // Internal alignment parameter @@ -112,6 +117,7 @@ impl Default for AlignPairwiseParams { min_match_rate: 0.3, seed_spacing: 100, mismatches_allowed: 3, + retry_reverse_complement: false, no_translate_past_stop: false, left_terminal_gaps_free: true, right_terminal_gaps_free: true, diff --git a/packages_rs/nextclade/src/constants.rs b/packages_rs/nextclade/src/constants.rs new file mode 100644 index 000000000..b102b539f --- /dev/null +++ b/packages_rs/nextclade/src/constants.rs @@ -0,0 +1 @@ +pub const REVERSE_COMPLEMENT_SUFFIX: &str = " |(reverse complement)"; diff --git a/packages_rs/nextclade/src/io/nextclade_csv.rs b/packages_rs/nextclade/src/io/nextclade_csv.rs index c39b3b198..e5562768a 100644 --- a/packages_rs/nextclade/src/io/nextclade_csv.rs +++ b/packages_rs/nextclade/src/io/nextclade_csv.rs @@ -86,6 +86,7 @@ static NEXTCLADE_CSV_HEADERS: &[&str] = &[ "qc.stopCodons.totalStopCodons", "qc.stopCodons.score", "qc.stopCodons.status", + "isReverseComplement", // "failedGenes", // "warnings", "errors", @@ -169,6 +170,7 @@ impl NextcladeResultsCsvWriter { // divergence, qc, custom_node_attributes, + is_reverse_complement, .. } = nextclade_outputs; @@ -365,6 +367,7 @@ impl NextcladeResultsCsvWriter { "qc.stopCodons.status", qc.stop_codons.as_ref().map(|sc| sc.status.to_string()), )?; + self.add_entry("isReverseComplement", &is_reverse_complement.to_string())?; // self.add_entry("failedGenes", &format_failed_genes(missing_genes, ARRAY_ITEM_DELIMITER))?; // self.add_entry("warnings", &format_aa_warnings(translations, ARRAY_ITEM_DELIMITER))?; self.add_entry("errors", &"")?; diff --git a/packages_rs/nextclade/src/lib.rs b/packages_rs/nextclade/src/lib.rs index 058de692f..744d3c773 100644 --- a/packages_rs/nextclade/src/lib.rs +++ b/packages_rs/nextclade/src/lib.rs @@ -1,5 +1,6 @@ pub mod align; pub mod analyze; +pub mod constants; pub mod gene; pub mod io; pub mod qc; diff --git a/packages_rs/nextclade/src/run/nextalign_run_one.rs b/packages_rs/nextclade/src/run/nextalign_run_one.rs index cecf66492..7094598b3 100644 --- a/packages_rs/nextclade/src/run/nextalign_run_one.rs +++ b/packages_rs/nextclade/src/run/nextalign_run_one.rs @@ -9,8 +9,11 @@ use crate::utils::error::report_to_string; use eyre::Report; use itertools::{Either, Itertools}; use std::collections::HashSet; +use std::fmt::format; pub fn nextalign_run_one( + index: usize, + seq_name: &str, qry_seq: &[Nuc], ref_seq: &[Nuc], ref_peptides: &TranslationMap, @@ -19,7 +22,7 @@ pub fn nextalign_run_one( gap_open_close_aa: &[i32], params: &AlignPairwiseParams, ) -> Result { - match align_nuc(qry_seq, ref_seq, gap_open_close_nuc, params) { + match align_nuc(index, seq_name, qry_seq, ref_seq, gap_open_close_nuc, params) { Err(report) => Err(report), Ok(alignment) => { @@ -34,7 +37,7 @@ pub fn nextalign_run_one( let stripped = insertions_strip(&alignment.qry_seq, &alignment.ref_seq); - let (translations, warnings): (Vec, Vec) = + let (translations, mut warnings): (Vec, Vec) = translations.into_iter().partition_map(|(gene_name, res)| match res { Ok(tr) => Either::Left(tr), Err(err) => Either::Right(PeptideWarning { @@ -51,12 +54,22 @@ pub fn nextalign_run_one( .cloned() .collect_vec(); + let is_reverse_complement = alignment.is_reverse_complement; + + if is_reverse_complement { + warnings.push(PeptideWarning { + gene_name: "nuc".to_owned(), + warning: format!("When processing sequence #{index} '{seq_name}': Sequence is reverse-complemented: Seed matching failed for the original sequence, but succeeded for its reverse complement. Outputs will be derived from the reverse complement and 'reverse complement' suffix will be added to sequence ID.") + }); + } + Ok(NextalignOutputs { stripped, alignment, translations, warnings, missing_genes, + is_reverse_complement, }) } } diff --git a/packages_rs/nextclade/src/run/nextclade_run_one.rs b/packages_rs/nextclade/src/run/nextclade_run_one.rs index 518b55441..6536b5c7b 100644 --- a/packages_rs/nextclade/src/run/nextclade_run_one.rs +++ b/packages_rs/nextclade/src/run/nextclade_run_one.rs @@ -12,6 +12,7 @@ use crate::analyze::nuc_changes::{find_nuc_changes, FindNucChangesOutput}; use crate::analyze::pcr_primer_changes::get_pcr_primer_changes; use crate::analyze::pcr_primers::PcrPrimer; use crate::analyze::virus_properties::VirusProperties; +use crate::constants::REVERSE_COMPLEMENT_SUFFIX; use crate::io::aa::Aa; use crate::io::gene_map::GeneMap; use crate::io::letter::Letter; @@ -27,6 +28,7 @@ use crate::types::outputs::{NextalignOutputs, NextcladeOutputs}; use eyre::Report; pub fn nextclade_run_one( + index: usize, seq_name: &str, qry_seq: &[Nuc], ref_seq: &[Nuc], @@ -46,7 +48,10 @@ pub fn nextclade_run_one( translations, warnings, missing_genes, + is_reverse_complement, } = nextalign_run_one( + index, + seq_name, qry_seq, ref_seq, ref_peptides, @@ -154,11 +159,18 @@ pub fn nextclade_run_one( qc_config, ); + let seq_name = if is_reverse_complement { + format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}") + } else { + seq_name.to_owned() + }; + Ok(( stripped.qry_seq, translations, NextcladeOutputs { - seq_name: seq_name.to_owned(), + index, + seq_name, substitutions, total_substitutions, deletions, @@ -195,6 +207,7 @@ pub fn nextclade_run_one( qc, custom_node_attributes: clade_node_attrs, nearest_node_id, + is_reverse_complement, }, )) } diff --git a/packages_rs/nextclade/src/translate/translate_genes.rs b/packages_rs/nextclade/src/translate/translate_genes.rs index 2c019c7d2..a3292d8a8 100644 --- a/packages_rs/nextclade/src/translate/translate_genes.rs +++ b/packages_rs/nextclade/src/translate/translate_genes.rs @@ -176,7 +176,7 @@ pub fn translate_gene( &aa_params, band_width, mean_shift, - )?; + ); let mut stripped = insertions_strip(&alignment.qry_seq, &alignment.ref_seq); diff --git a/packages_rs/nextclade/src/types/outputs.rs b/packages_rs/nextclade/src/types/outputs.rs index 8b34a6067..09f98d2be 100644 --- a/packages_rs/nextclade/src/types/outputs.rs +++ b/packages_rs/nextclade/src/types/outputs.rs @@ -31,11 +31,13 @@ pub struct NextalignOutputs { pub translations: Vec, pub warnings: Vec, pub missing_genes: Vec, + pub is_reverse_complement: bool, } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct NextcladeOutputs { + pub index: usize, pub seq_name: String, pub substitutions: Vec, pub total_substitutions: usize, @@ -75,6 +77,7 @@ pub struct NextcladeOutputs { pub qc: QcResult, pub custom_node_attributes: BTreeMap, pub nearest_node_id: usize, + pub is_reverse_complement: bool, } impl NextcladeOutputs {