Skip to content

Commit

Permalink
Merge pull request #1377 from nextstrain/feat/gap-alignment-left
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Jan 14, 2024
2 parents 93a7146 + f77cd51 commit 5778fa6
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/user/nextclade-cli/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`.

* `--excess-bandwidth <EXCESS_BANDWIDTH>` — Excess bandwidth for internal stripes
* `--terminal-bandwidth <TERMINAL_BANDWIDTH>` — Excess bandwidth for terminal stripes
* `--gap-alignment-side <GAP_ALIGNMENT_SIDE>` — Whether to align gaps on the left or right side if equally parsimonious. Left aligning gaps is the convention, right align is Nextclade's historic default
* `--gap-alignment-side <GAP_ALIGNMENT_SIDE>` — Whether to align gaps on the left or right side if equally parsimonious. Default: left

Possible values: `left`, `right`

Expand Down
11 changes: 8 additions & 3 deletions packages_rs/nextclade/src/align/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ pub enum GapAlignmentSide {
Right,
}

impl Default for GapAlignmentSide {
fn default() -> Self {
Self::Left
}
}

// NOTE: The `optfield` attribute creates a struct that have the same fields, but which are wrapped into `Option`,
// as well as adds a method `.merge_opt(&opt)` to the original struct, which merges values from the optional counterpart
// into self (mutably).
Expand Down Expand Up @@ -82,8 +88,7 @@ pub struct AlignPairwiseParams {
#[clap(long)]
pub terminal_bandwidth: i32,

/// Whether to align gaps on the left or right side if equally parsimonious.
/// Left aligning gaps is the convention, right align is Nextclade's historic default
/// Whether to align gaps on the left or right side if equally parsimonious. Default: left
#[clap(long, value_enum)]
pub gap_alignment_side: GapAlignmentSide,

Expand Down Expand Up @@ -158,7 +163,7 @@ impl Default for AlignPairwiseParams {
no_translate_past_stop: false,
left_terminal_gaps_free: true,
right_terminal_gaps_free: true,
gap_alignment_side: GapAlignmentSide::Right,
gap_alignment_side: GapAlignmentSide::default(),
excess_bandwidth: 9,
terminal_bandwidth: 50,
min_seed_cover: 0.33,
Expand Down
63 changes: 61 additions & 2 deletions packages_rs/nextclade/src/align/score_matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ pub fn score_matrix<T: Letter<T>>(

trace!("Score matrix: allocated alignment band of size={band_size}");

// The variable left_align changes the < effectively into <= in the conditions where it's used,
// in order to select preferred alignment where there's two equally good possibilities.
let left_align = match params.gap_alignment_side {
GapAlignmentSide::Left => 1,
GapAlignmentSide::Right => 0,
Expand Down Expand Up @@ -267,6 +269,63 @@ mod tests {
],
);

#[rustfmt::skip]
let expected_paths = Band2d::<i8>::with_data(
&stripes,
&[
0, 10, 10, 10,
20, 1, 9, 9, 41,
20, 17, 17, 25, 9,
20, 1, 25, 1, 25, 34, 42,
20, 17, 1, 25, 4, 9, 2, 10,
20, 17, 25, 2, 25, 12, 9, 2,
20, 17, 4, 25, 18, 25, 12, 9,
20, 17, 25, 4, 17, 18, 26, 12,
52, 17, 4, 17, 18, 28,
52, 20, 20, 4, 17, 18,
20, 20, 20, 4, 1,
],
);

assert_eq!(expected_scores, result.scores);
assert_eq!(expected_paths, result.paths);

Ok(())
}

#[rstest]
fn pads_missing_left_with_alignment_gap_right(mut ctx: Context) -> Result<(), Report> {
let qry_seq = to_nuc_seq("CTCGCTG")?;
let ref_seq = to_nuc_seq("ACGCTCGCTG")?;

let band_width = 5;
let mean_shift = 2;

let mut stripes = simple_stripes(mean_shift, band_width, ref_seq.len(), qry_seq.len());
stripes[2].end = stripes[2].end - 1;
stripes[8].begin = stripes[8].begin + 1;

ctx.params.gap_alignment_side = GapAlignmentSide::Right;
let result = score_matrix(&qry_seq, &ref_seq, &ctx.gap_open_close, &stripes, &ctx.params);

#[rustfmt::skip]
let expected_scores = Band2d::<i32>::with_data(
&stripes,
&[
0, 0, 0, 0,
0, -1, -1, -1, -1,
0, 3, -2, 2, -2,
0, -1, 2, -3, 5, -1, -1,
0, 3, -2, 5, -1, 8, 2, 2,
0, -1, 6, 0, 4, 2, 11, 5,
0, 3, 0, 9, 3, 7, 5, 10,
0, -1, 2, 3, 12, 6, 6, 10,
0, 5, 6, 15, 9, 10,
0, 3, 6, 9, 18, 12,
3, 6, 9, 12, 21,
],
);

#[rustfmt::skip]
let expected_paths = Band2d::<i8>::with_data(
&stripes,
Expand All @@ -276,8 +335,8 @@ mod tests {
20, 17, 17, 25, 9,
20, 1, 25, 1, 25, 34, 42,
20, 17, 1, 25, 2, 9, 2, 10,
20, 17, 25, 2, 25, 12, 9, 2,
20, 17, 4, 25, 18, 25, 12, 9,
20, 17, 25, 2, 25, 12, 9, 2,
20, 17, 4, 25, 18, 25, 12, 9,
20, 17, 25, 4, 17, 18, 25, 12,
52, 17, 4, 17, 18, 28,
52, 20, 20, 4, 17, 18,
Expand Down

1 comment on commit 5778fa6

@vercel
Copy link

@vercel vercel bot commented on 5778fa6 Jan 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextclade – ./

nextclade.vercel.app
nextclade-git-master-nextstrain.vercel.app
nextclade-nextstrain.vercel.app

Please sign in to comment.