diff --git a/rescript/plugin_setup.py b/rescript/plugin_setup.py index ad39de7..22a5b73 100644 --- a/rescript/plugin_setup.py +++ b/rescript/plugin_setup.py @@ -1061,6 +1061,7 @@ 'primer_rev': Str, 'position_start': Int % Range(1, None), 'position_end': Int % Range(1, None), + 'keep_primer_location': Bool, 'n_threads': Int % Range(1, None), }, outputs=[('trimmed_sequences', FeatureData[AlignedSequence]), ], @@ -1078,6 +1079,11 @@ 'will end. If not provided, alignment will not be ' 'trimmed at the end. If reverse primer is specified ' 'this parameter will be ignored.', + 'keep_primer_location': 'Retain the alignment positions of the ' + 'primer binding location. Note: the ' + 'primers themselves will be removed, but ' + 'the alignment positions where the primers ' + 'align will be retained in the alignment.', 'n_threads': 'Number of threads to use for primer-based trimming, ' 'otherwise ignored. (Use `auto` to automatically use ' 'all available cores)' @@ -1093,12 +1099,12 @@ "alignment will be generated to locate primer positions. " "Subsequently, start (5'-most) and end (3'-most) position from fwd " "and rev primer located within the new alignment is identified and " - "used for slicing the original alignment. That is, the primer region " - "will be included in the new alignment output. WARNING: finding " - "alignment positions via primer search can be inefficient for very " - "large alignments and is only recomended for small alignments. " - "For large alignments providing specific alignment positions is " - "ideal."), + "used for slicing the original alignment. The retention of alignment " + "positions that span the primer locations can be toggled. " + "WARNING: finding alignment positions via primer search can be " + "inefficient for very large alignments and is only recomended for " + "small alignments. For large alignments providing specific alignment " + "positions is ideal."), ) T = TypeMatch([AlignedSequence, Sequence]) diff --git a/rescript/tests/data/small-silva-v4-trim-keeplength-nokeepprimers.fasta b/rescript/tests/data/small-silva-v4-trim-keeplength-nokeepprimers.fasta new file mode 100644 index 0000000..ebddeae --- /dev/null +++ b/rescript/tests/data/small-silva-v4-trim-keeplength-nokeepprimers.fasta @@ -0,0 +1,10 @@ +>AB299544.1.1336 Bacteria;Firmicutes;Clostridia;Oscillospirales;Ruminococcaceae;uncultured;uncultured Clostridiales bacterium +T-------AC---GT-AG-GGA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GCT-A-G---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------C--G--T---T--GT-C-CGG-AT------TT-A--C-T--GG-GT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GT--A-----AA-GG-GT-GC-------G-TA-G-G-C-G---------------G--C-TT-G-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AA----G-T-C-A-----------------------------------------------------G-A-C--G--TG--A-AA-TT--T-A-CA-G-G----------------------------------------------------------------------------------------------------------------------------------------------CT-T-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-T-G-T-A-A--A-C----T-G--C-G---T--T----------------------------T--GA-A-A---C----------------------------------------------------T--G-C--TT--G-G-C--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T--G-----G-AG------TA-G-A---------------------G-G-C-A---GA-T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG--A--ATT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-C-C-A-GT--GT-A-G-CG-GT--G-----------------A--A-A---------------------------------------------------------------------------------------------------TG-C-GT-AG--AT-A-TT-------------------------------G-G------A-A------G-G-A-AC-A-CC------------------------------------------------GG--T--G--GC-GAA-G--G-C----G--------G--T-C-T-G---CTG---------G--GC-T-T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A--------A-C-T--GA--CG-----C-----------------------------------------------------------T-G--A-GG--C-A-CG-A--AA-G-C--------------A-TG--GG-T--AG-C-A-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CA--GG- +>JF826529.1.1310 Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter;Achromobacter sp. NS014 +T-------AC---AT-AG-GGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GCA-A-G---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------C--G--T---T--AA-T-CGG-AA------TT-A--C-T--GG-GC-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GT--A-----AA-GC-GT-GC-------G-CA-G-G-C-G---------------G--T-TC-G-G-A---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AA----G-A-A-A-----------------------------------------------------G-A-T--C--TG--A-AA-TC--C-C-AG-A-C----------------------------------------------------------------------------------------------------------------------------------------------CT-T-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------C-T-T-T-G-G-A--A-C----T-G--C-A---T--T----------------------------T--TT-A-A---C----------------------------------------------------T--A-C--CG--A-C-C--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T--G-----T-GT------CA-G-A---------------------G-T-G-A---CG-T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG--A--ATT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-G-C-GC--GT-A-G-CA-GT--G-----------------A--A-A--------------------------------------------------------------------------------------------------TTG-C-GT-AG--AT-T-TG------------------------------GC-G------G-A------G-G-A-AC-A-CC-----------------------------------------------CGA--T--G--GCGGAA-G--C-C----A--------C--C-C-T-C---CTG---------G--GA-T-A----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AAC--------A-C-T--GA--CG-----C-----------------------------------------------------------T-C--A-TG--C-A-CG-A--AAAG-C--------------G-TG--GG-G--AG-C-A-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CA--GG- +>CP011401.2356993.2358524 Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;Alcaligenaceae;Bordetella;Bordetella pertussis +T-------AC---GT-AG-GGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GCA-A-G---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------C--G--T---T--AA-T-CGG-AA------TT-A--C-T--GG-GC-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GT--A-----AA-GC-GT-GC-------G-CA-G-G-C-G---------------G--T-TC-G-G-A---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AA----G-A-A-A-----------------------------------------------------G-A-T--G--TG--A-AA-TC--C-C-AG-G-G----------------------------------------------------------------------------------------------------------------------------------------------CT-T-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-T-T-G-G-A--A-C----T-G--C-A---T--T----------------------------T--TT-A-A---C----------------------------------------------------T--A-C--CG--G-G-C--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T--G-----T-GT------CA-G-A---------------------G-G-G-A---GG-T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG--A--ATT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-G-C-GT--GT-A-G-CA-GT--G-----------------A--A-A---------------------------------------------------------------------------------------------------TG-C-GT-AG--AT-A-TG-------------------------------C-G------G-A------G-G-A-AC-A-CC------------------------------------------------GA--T--G--GC-GAA-G--G-C----A--------G--C-C-T-C---CTG---------G--GA-T-A----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------A-C--------A-C-T--GA--CG-----C-----------------------------------------------------------T-C--A-TG--C-A-CG-A--AA-G-T--------------G-TG--GG-G--AG-C-A-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CA--GG- +>HM446088.1.1410 Bacteria;Cyanobacteria;Cyanobacteriia;Synechococcales;Cyanobiaceae;Cyanobium PCC-6307;uncultured bacterium +T-------AC---GG-GA-GTG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GCA-A-G---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------C--G--T---T--AT-C-CGG-AA------TT-A--T-T--GG-GC-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GT--A-----AA-GC-GT-CC-------G-CA-G-G-C-G---------------G--C-CT-T-G-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AA----G-T-C-G-----------------------------------------------------G-C-T--G--TC--A-AA-GC--G-T-GG-A-G----------------------------------------------------------------------------------------------------------------------------------------------CT-C-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------C-T-C-C-A-T-T--T-C----G-G--C-A---G--T----------------------------G--GA-A-A---C----------------------------------------------------T--A-C--AG--G-G-C--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T--G-----C-GG------TA-G-G---------------------G-G-C-A---GA-G-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG--A--ATT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-C-G-GT--GT-A-G-CG-GT--G-----------------A--A-A---------------------------------------------------------------------------------------------------TG-C-GT-AG--AT-A-TC-------------------------------G-G------G-A------A-G-A-AC-A-CC------------------------------------------------AG--T--G--GC-GAA-G--G-C----G--------C--T-C-T-G---CTG---------G--AC-C-A----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A--------A-C-T--GA--CG-----N-----------------------------------------------------------T-C--A-TG--G-A-CG-A--AA-G-C--------------T-AG--GG-G--AG-C-G-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AG--GG- +>FJ484772.1.1316 Bacteria;Proteobacteria;Gammaproteobacteria;Acidiferrobacterales;Acidiferrobacteraceae;Sulfurifustis;uncultured proteobacterium +T-------AC---AG-AG-GGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GCA-A-G---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------C--G--T---T--AA-T-CGG-AA------TT-A--C-T--GG-GC-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GT--A-----AA-GC-GT-GT-------G-TA-G-G-T-G---------------G--T-TT-G-T-T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AA----G-T-C-A-----------------------------------------------------G-G-C--G--TG--A-AA-TC--C-C-TG-G-G----------------------------------------------------------------------------------------------------------------------------------------------CT-C-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-T-G-G-G-A--A-C----T-G--C-G---C--T----------------------------T--GA-T-A---C----------------------------------------------------T--G-G--CA--G-A-C--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T--G-----T-GG------TA-G-A---------------------G-G-G-T---AG-T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG--A--ATT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------C-C-G-C-AT--GT-A-G-CA-GT--G-----------------A--A-A---------------------------------------------------------------------------------------------------TG-C-GT-AG--AG-A-TG-------------------------------C-G------G-A------G-G-A-AC-A-TC------------------------------------------------AG--T--G--GC-GAA-G--G-C----G--------G--C-T-A-C---CTG---------G--AC-C-A----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------A-C--------A-C-T--GA--CA-----C-----------------------------------------------------------T-G--A-GG--C-A-CG-A--AA-G-C--------------G-TG--GG-T--AG-C-A-AA----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CA--GG- \ No newline at end of file diff --git a/rescript/tests/test_trim_alignment.py b/rescript/tests/test_trim_alignment.py index bc3bc9d..aba0f06 100644 --- a/rescript/tests/test_trim_alignment.py +++ b/rescript/tests/test_trim_alignment.py @@ -64,6 +64,11 @@ def setUp(self): 'FeatureData[AlignedSequence]', self.silva_alignment) + silva_alignment_v4_trim_keeplen_nokeep_primer_fp = self.get_data_path( + 'small-silva-v4-trim-keeplength-nokeepprimers.fasta') + self.silva_v4_trim_keeplen_nokeep_primer = AlignedDNAFASTAFormat( + silva_alignment_v4_trim_keeplen_nokeep_primer_fp, mode='r') + silva_alignment_v4_trim_keeplen_wo_primers_fp = self.get_data_path( 'small-silva-v4-trim-keeplength.fasta') self.silva_v4_trim_keeplen = AlignedDNAFASTAFormat( @@ -117,6 +122,17 @@ def setUp(self): 'GGAAGANAGGCCTTCGGGTTGTAAACCGCTTTTGTTCGGGAAGAAATC'), 's5': ('GGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTG' 'TGAAGA-AGGCCTTTTGGTTGTAAAGCACTTTAAGTGGGGAGGAAAAG'), } + self.exp_seqs_both_no_primers = { + 's1': ('GGGAATCTTCCACAATGGGTGCAAACCTGATGGAGCAATGCCGCGTGAG' + 'TGAAGANAGGTCTTCGGATCGTAAAGCTCTGTTGTTAGAGAAGAACAC'), + 's2': ('GGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGGG' + 'GGATGA-CGGCCTTCGGGTTGTAAACTCCTTTCGCCAGGGACGAAGCG'), + 's3': ('GGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTG' + 'TGAAGA-AGGCCTTTTGGTTGTAAAGCACTTTAAGTGGGGAGGAAAAG'), + 's4': ('GGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCCATGCCGCGTGCG' + 'GGAAGANAGGCCTTCGGGTTGTAAACCGCTTTTGTTCGGGAAGAAATC'), + 's5': ('GGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTG' + 'TGAAGA-AGGCCTTTTGGTTGTAAAGCACTTTAAGTGGGGAGGAAAAG'), } self.exp_seqs_only_fwd = { 's1': ('GGGAATCTTCCACAATGGGTGCAAACCTGATGGAGCAATGCCGCGTGAG' 'TGAAGANAGGTCTTCGGATCGTAAAGCTCTGTTGTTAGAGAAGAACACG' @@ -214,22 +230,26 @@ def test_process_primers_only_rev(self): self.assertDictEqual(obs_primers, exp_primers) def test_locate_positions(self): - obs_pos = _locate_primer_positions(self.aligned_with_primers_fasta) + obs_pos = _locate_primer_positions(self.aligned_with_primers_fasta, + keep_primer_location=True) exp_pos = {"start": 7, "end": 104} self.assertDictEqual(obs_pos, exp_pos) def test_locate_positions_only_fwd(self): - obs_pos = _locate_primer_positions(self.aligned_with_fwd_fasta) + obs_pos = _locate_primer_positions(self.aligned_with_fwd_fasta, + keep_primer_location=True) exp_pos = {"start": 7, "end": None} self.assertDictEqual(obs_pos, exp_pos) def test_locate_positions_only_rev(self): - obs_pos = _locate_primer_positions(self.aligned_with_rev_fasta) + obs_pos = _locate_primer_positions(self.aligned_with_rev_fasta, + keep_primer_location=True) exp_pos = {"start": None, "end": 104} self.assertDictEqual(obs_pos, exp_pos) def test_locate_positions_no_primers(self): - obs_pos = _locate_primer_positions(self.aligned_seqs_fasta) + obs_pos = _locate_primer_positions(self.aligned_seqs_fasta, + keep_primer_location=True) exp_pos = {"start": None, "end": None} self.assertDictEqual(obs_pos, exp_pos) @@ -241,7 +261,8 @@ def test_locate_positions_no_primers(self): def test_locate_positions_strange_alignment(self): with self.assertRaisesRegex( ValueError, 'Reverse primer overlaps'): - _locate_primer_positions(self.aligned_mess_fasta) + _locate_primer_positions(self.aligned_mess_fasta, + keep_primer_location=True) # test trimming with both, start and end, positions given def test_trim_all_sequences(self): @@ -278,7 +299,8 @@ def test_trim_alignment_keeplen_false(self): self.aligned_silva_seqs_art, self.v4_primers_dict["forward"], self.v4_primers_dict["reverse"], - keeplength=False) + keeplength=False, + keep_primer_location=True) obs_aln = skbio.io.read(str(obs_v4_nokeep_aln), into=skbio.TabularMSA, constructor=skbio.DNA) @@ -295,7 +317,8 @@ def test_trim_alignment_keeplen_true(self): self.aligned_silva_seqs_art, self.v4_primers_dict["forward"], self.v4_primers_dict["reverse"], - keeplength=True) + keeplength=True, + keep_primer_location=True) obs_aln = skbio.io.read(str(obs_v4_keep_aln), into=skbio.TabularMSA, constructor=skbio.DNA) @@ -304,13 +327,35 @@ def test_trim_alignment_keeplen_true(self): constructor=skbio.DNA) self.assertEqual(obs_aln, exp_aln) + # test trimming when both primers are given and keeplength = True + # tests against expected alignment length w/o returning primer + # sequence in the output. + def test_trim_alignment_keeplen_true_no_keep_primers_false(self): + obs_v4_keep_nokeep_primer_aln = _trim_alignment( + mafft_add, + self.aligned_silva_seqs_art, + self.v4_primers_dict["forward"], + self.v4_primers_dict["reverse"], + keeplength=True, + keep_primer_location=False) + + obs_aln = skbio.io.read(str(obs_v4_keep_nokeep_primer_aln), + into=skbio.TabularMSA, + constructor=skbio.DNA) + exp_aln = skbio.io.read(str(self.silva_v4_trim_keeplen_nokeep_primer), + into=skbio.TabularMSA, + constructor=skbio.DNA) + print(obs_aln, '\n', exp_aln) + self.assertEqual(obs_aln, exp_aln) + # test trimming when only fwd primer is given def test_trim_alignment_only_fwd(self): obs = _trim_alignment( self.fake_ctx.get_action(2), self.aligned_seqs_fasta, self.primers_dict["forward"], - None) + None, + keep_primer_location=True) obs_seqs = {seq.metadata['id']: str(seq) for seq in obs.view(DNAIterator)} self.assertDictEqual(obs_seqs, self.exp_seqs_only_fwd) @@ -321,7 +366,8 @@ def test_trim_alignment_only_rev(self): self.fake_ctx.get_action(3), self.aligned_seqs_fasta, None, - self.primers_dict["reverse"]) + self.primers_dict["reverse"], + keep_primer_location=True) obs_seqs = {seq.metadata['id']: str(seq) for seq in obs.view(DNAIterator)} self.assertDictEqual(obs_seqs, self.exp_seqs_only_rev) @@ -331,7 +377,8 @@ def test_trim_alignment_by_positions(self): obs = _trim_alignment( self.fake_ctx.get_action(1), self.aligned_seqs_fasta, - None, None, 8, 104) + None, None, 8, 104, + keep_primer_location=True) obs_seqs = {seq.metadata['id']: str(seq) for seq in obs.view(DNAIterator)} self.assertDictEqual(obs_seqs, self.exp_seqs_both_primers) @@ -341,7 +388,8 @@ def test_trim_alignment_by_position_left(self): obs = _trim_alignment( self.fake_ctx.get_action(2), self.aligned_seqs_fasta, - None, None, 8, None) + None, None, 8, None, + keep_primer_location=True) obs_seqs = {seq.metadata['id']: str(seq) for seq in obs.view(DNAIterator)} self.assertDictEqual(obs_seqs, self.exp_seqs_only_fwd) @@ -351,7 +399,8 @@ def test_trim_alignment_by_position_right(self): obs = _trim_alignment( self.fake_ctx.get_action(3), self.aligned_seqs_fasta, - None, None, None, 104) + None, None, None, 104, + keep_primer_location=True) obs_seqs = {seq.metadata['id']: str(seq) for seq in obs.view(DNAIterator)} self.assertDictEqual(obs_seqs, self.exp_seqs_only_rev) diff --git a/rescript/trim_alignment.py b/rescript/trim_alignment.py index f90d791..db58442 100644 --- a/rescript/trim_alignment.py +++ b/rescript/trim_alignment.py @@ -45,7 +45,8 @@ def _trim_all_sequences(aligned_sequences: AlignedDNAFASTAFormat, return result -def _find_terminal_positions(primer_positions: dict) -> (int, int): +def _find_terminal_positions(primer_positions: dict, + keep_primer_location=False) -> (int, int): """ Identify left- (5') and rightmost (3') trimming position. If both primers were used return: (start index of the forward primer, end index of @@ -69,19 +70,30 @@ def _find_terminal_positions(primer_positions: dict) -> (int, int): primer_positions["forward"]["end"]: raise ValueError("Reverse primer overlaps or aligned upstream the " "forward primer. Are the primers correct?") - pos_start = min([x["start"] for x in primer_positions.values()]) - pos_end = max([x["end"] for x in primer_positions.values()]) + if keep_primer_location: + pos_start = min([x["start"] for x in primer_positions.values()]) + pos_end = max([x["end"] for x in primer_positions.values()]) + else: + pos_start = primer_positions["forward"]["end"] + pos_end = primer_positions["reverse"]["start"] # when only fwd primer was used elif "forward" in primer_positions.keys(): - pos_start = primer_positions["forward"]["start"] + if keep_primer_location: + pos_start = primer_positions["forward"]["start"] + else: + pos_start = primer_positions["forward"]["end"] # when only rev primer was used elif "reverse" in primer_positions.keys(): - pos_end = primer_positions["reverse"]["end"] + if keep_primer_location: + pos_end = primer_positions["reverse"]["end"] + else: + pos_end = primer_positions["reverse"]["start"] return pos_start, pos_end def _locate_primer_positions( - alignment_with_primers: AlignedDNAFASTAFormat) -> dict: + alignment_with_primers: AlignedDNAFASTAFormat, + keep_primer_location) -> dict: """ Identify position of each primer within the alignment. @@ -108,7 +120,8 @@ def _locate_primer_positions( (i for i, nt in enumerate(primer_seq[::-1]) if nt != "-")) } - pos_start, pos_end = _find_terminal_positions(primer_positions) + pos_start, pos_end = _find_terminal_positions(primer_positions, + keep_primer_location) # not doing any validation like in _prepare_positions since none of # the conditions checked there are possible to run into with the way @@ -191,6 +204,7 @@ def _trim_alignment(expand_alignment_action, primer_rev=None, position_start=None, position_end=None, + keep_primer_location=False, n_threads=1, keeplength=True) -> AlignedDNAFASTAFormat: """ @@ -236,7 +250,8 @@ def _trim_alignment(expand_alignment_action, keeplength=keeplength) # find trim positions based on primer positions within alignment - trim_positions = _locate_primer_positions(alignment_with_primers) + trim_positions = _locate_primer_positions(alignment_with_primers, + keep_primer_location) else: # find length of the alignment seq_iter = aligned_sequences.view(AlignedDNAIterator).generator @@ -256,6 +271,7 @@ def trim_alignment(ctx, primer_rev=None, position_start=None, position_end=None, + keep_primer_location=False, n_threads=1): """ Trim an existing alignment based on provided primers or specific, @@ -276,6 +292,7 @@ def trim_alignment(ctx, primer_rev, position_start, position_end, + keep_primer_location=keep_primer_location, n_threads=n_threads) return qiime2.Artifact.import_data('FeatureData[AlignedSequence]', result)