@@ -204,6 +204,41 @@ def test_correct_processing_of_two_overlapping_and_one_separate_contig(exact_ali
204
204
assert len (visualizer ().elements ) > len (contigs )
205
205
206
206
207
+ def test_correct_processing_of_two_overlapping_and_one_separate_contig_2 (exact_aligner , visualizer ):
208
+ # Scenario: Two overlapping contigs are stitched together, the non-overlapping is kept separate.
209
+
210
+ ref_seq = 'Z' * 5 + 'A' * 100 + 'C' * 100 + 'T' * 100 + 'Y' * 5
211
+
212
+ contigs = [
213
+ GenotypedContig (name = 'a' ,
214
+ seq = 'N' * 5 + 'A' * 50 + 'C' * 20 + 'H' * 5 ,
215
+ ref_name = 'testref' ,
216
+ group_ref = 'testref' ,
217
+ ref_seq = ref_seq ,
218
+ match_fraction = 0.5 ,
219
+ ),
220
+ GenotypedContig (name = 'b' ,
221
+ seq = 'M' * 5 + 'C' * 50 + 'T' * 20 + 'J' * 5 ,
222
+ ref_name = 'testref' ,
223
+ group_ref = 'testref' ,
224
+ ref_seq = ref_seq ,
225
+ match_fraction = 0.5 ,
226
+ ),
227
+ GenotypedContig (name = 'c' ,
228
+ seq = 'Q' * 5 + 'C' * 20 + 'T' * 50 + 'I' * 5 ,
229
+ ref_name = 'testref' ,
230
+ group_ref = 'testref' ,
231
+ ref_seq = ref_seq ,
232
+ match_fraction = 0.5 ,
233
+ ),
234
+ ]
235
+
236
+ results = list (stitch_consensus (contigs ))
237
+ assert len (results ) == 1
238
+ assert results [0 ].seq == contigs [0 ].seq .rstrip ('H' ) + 'C' * 30 + contigs [2 ].seq .lstrip ('Q' )
239
+ assert len (visualizer ().elements ) > len (contigs )
240
+
241
+
207
242
def test_stitching_of_all_overlapping_contigs_into_one_sequence (exact_aligner , visualizer ):
208
243
# Scenario: All contigs have some overlapping parts, resulting in one continuous sequence after stitching.
209
244
@@ -755,6 +790,7 @@ def test_gap_around_small_insertion(exact_aligner, visualizer):
755
790
756
791
results = list (stitch_consensus (contigs ))
757
792
assert len (results ) == 1
793
+ assert results [0 ].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
758
794
assert len (visualizer ().elements ) > len (contigs )
759
795
760
796
@@ -782,6 +818,50 @@ def test_gap_around_big_insertion(exact_aligner, visualizer):
782
818
783
819
results = list (stitch_consensus (contigs ))
784
820
assert len (results ) == 1
821
+ assert results [0 ].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
822
+ assert len (visualizer ().elements ) > len (contigs )
823
+
824
+
825
+ def test_stitch_with_insertion (exact_aligner , visualizer ):
826
+ # Scenario: Contig is aligned with multiple hits, and the borders are correctly handled.
827
+
828
+ ref_seq = 'X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
829
+
830
+ contigs = [
831
+ GenotypedContig (name = 'a' ,
832
+ seq = 'P' * 5 + 'A' * 10 + 'D' * 6 + 'C' * 10 + 'Z' * 5 ,
833
+ ref_name = 'testref' ,
834
+ group_ref = 'testref' ,
835
+ ref_seq = ref_seq ,
836
+ match_fraction = 0.3 ,
837
+ ),
838
+ ]
839
+
840
+ results = list (stitch_consensus (contigs ))
841
+ assert len (results ) == 1
842
+ assert results [0 ].seq == "PPPPPAAAAAAAAAADDDDDDCCCCCCCCCCZZZZZ"
843
+ assert len (visualizer ().elements ) > len (contigs )
844
+
845
+
846
+
847
+ def test_stitch_cross_alignment (exact_aligner , visualizer ):
848
+ # Scenario: Single contig is cross-aligned.
849
+
850
+ ref_seq = 'X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
851
+
852
+ contigs = [
853
+ GenotypedContig (name = 'a' ,
854
+ seq = 'P' * 5 + 'C' * 10 + 'D' * 6 + 'A' * 10 + 'Z' * 5 ,
855
+ ref_name = 'testref' ,
856
+ group_ref = 'testref' ,
857
+ ref_seq = ref_seq ,
858
+ match_fraction = 0.3 ,
859
+ ),
860
+ ]
861
+
862
+ results = list (stitch_consensus (contigs ))
863
+ assert len (results ) == 1
864
+ assert results [0 ].seq == "AAAAAAAAAACCCCCCCCCC"
785
865
assert len (visualizer ().elements ) > len (contigs )
786
866
787
867
0 commit comments