@@ -204,6 +204,41 @@ def test_correct_processing_of_two_overlapping_and_one_separate_contig(exact_ali
204
204
assert len (visualizer ().elements ) > len (contigs )
205
205
206
206
207
+ def test_correct_processing_of_two_overlapping_and_one_separate_contig_2 (exact_aligner , visualizer ):
208
+ # Scenario: Two overlapping contigs are stitched together, the non-overlapping is kept separate.
209
+
210
+ ref_seq = 'Z' * 5 + 'A' * 100 + 'C' * 100 + 'T' * 100 + 'Y' * 5
211
+
212
+ contigs = [
213
+ GenotypedContig (name = 'a' ,
214
+ seq = 'N' * 5 + 'A' * 50 + 'C' * 20 + 'H' * 5 ,
215
+ ref_name = 'testref' ,
216
+ group_ref = 'testref' ,
217
+ ref_seq = ref_seq ,
218
+ match_fraction = 0.5 ,
219
+ ),
220
+ GenotypedContig (name = 'b' ,
221
+ seq = 'M' * 5 + 'C' * 50 + 'T' * 20 + 'J' * 5 ,
222
+ ref_name = 'testref' ,
223
+ group_ref = 'testref' ,
224
+ ref_seq = ref_seq ,
225
+ match_fraction = 0.5 ,
226
+ ),
227
+ GenotypedContig (name = 'c' ,
228
+ seq = 'Q' * 5 + 'C' * 20 + 'T' * 50 + 'I' * 5 ,
229
+ ref_name = 'testref' ,
230
+ group_ref = 'testref' ,
231
+ ref_seq = ref_seq ,
232
+ match_fraction = 0.5 ,
233
+ ),
234
+ ]
235
+
236
+ results = list (stitch_consensus (contigs ))
237
+ assert len (results ) == 1
238
+ assert results [0 ].seq == contigs [0 ].seq .rstrip ('H' ) + 'C' * 30 + contigs [2 ].seq .lstrip ('Q' )
239
+ assert len (visualizer ().elements ) > len (contigs )
240
+
241
+
207
242
def test_stitching_of_all_overlapping_contigs_into_one_sequence (exact_aligner , visualizer ):
208
243
# Scenario: All contigs have some overlapping parts, resulting in one continuous sequence after stitching.
209
244
@@ -745,7 +780,7 @@ def test_gap_around_small_insertion(exact_aligner, visualizer):
745
780
match_fraction = 0.3 ,
746
781
),
747
782
GenotypedContig (name = 'b' ,
748
- seq = 'B' * 20 ,
783
+ seq = 'Q' * 5 + ' B' * 20 + 'J' * 5 ,
749
784
ref_name = 'testref' ,
750
785
group_ref = 'testref' ,
751
786
ref_seq = ref_seq ,
@@ -755,6 +790,7 @@ def test_gap_around_small_insertion(exact_aligner, visualizer):
755
790
756
791
results = list (stitch_consensus (contigs ))
757
792
assert len (results ) == 1
793
+ assert results [0 ].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
758
794
assert len (visualizer ().elements ) > len (contigs )
759
795
760
796
@@ -772,7 +808,78 @@ def test_gap_around_big_insertion(exact_aligner, visualizer):
772
808
match_fraction = 0.3 ,
773
809
),
774
810
GenotypedContig (name = 'b' ,
775
- seq = 'B' * 20 ,
811
+ seq = 'Q' * 5 + 'B' * 20 + 'J' * 5 ,
812
+ ref_name = 'testref' ,
813
+ group_ref = 'testref' ,
814
+ ref_seq = ref_seq ,
815
+ match_fraction = 0.3 ,
816
+ ),
817
+ ]
818
+
819
+ results = list (stitch_consensus (contigs ))
820
+ assert len (results ) == 1
821
+ assert results [0 ].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
822
+ assert len (visualizer ().elements ) > len (contigs )
823
+
824
+
825
+ def test_stitch_with_insertion (exact_aligner , visualizer ):
826
+ # Scenario: Contig is aligned with multiple hits, and the borders are correctly handled.
827
+
828
+ ref_seq = 'X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
829
+
830
+ contigs = [
831
+ GenotypedContig (name = 'a' ,
832
+ seq = 'P' * 5 + 'A' * 10 + 'D' * 6 + 'C' * 10 + 'Z' * 5 ,
833
+ ref_name = 'testref' ,
834
+ group_ref = 'testref' ,
835
+ ref_seq = ref_seq ,
836
+ match_fraction = 0.3 ,
837
+ ),
838
+ ]
839
+
840
+ results = list (stitch_consensus (contigs ))
841
+ assert len (results ) == 1
842
+ assert results [0 ].seq == "PPPPPAAAAAAAAAADDDDDDCCCCCCCCCCZZZZZ"
843
+ assert len (visualizer ().elements ) > len (contigs )
844
+
845
+
846
+
847
+ def test_stitch_cross_alignment (exact_aligner , visualizer ):
848
+ # Scenario: Single contig is cross-aligned.
849
+
850
+ ref_seq = 'X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
851
+
852
+ contigs = [
853
+ GenotypedContig (name = 'a' ,
854
+ seq = 'P' * 5 + 'C' * 10 + 'D' * 6 + 'A' * 10 + 'Z' * 5 ,
855
+ ref_name = 'testref' ,
856
+ group_ref = 'testref' ,
857
+ ref_seq = ref_seq ,
858
+ match_fraction = 0.3 ,
859
+ ),
860
+ ]
861
+
862
+ results = list (stitch_consensus (contigs ))
863
+ assert len (results ) == 1
864
+ assert results [0 ].seq == "AAAAAAAAAACCCCCCCCCC"
865
+ assert len (visualizer ().elements ) > len (contigs )
866
+
867
+
868
+ def test_cross_alignment_around_small_insertion (exact_aligner , visualizer ):
869
+ # Scenario: Single contig is cross-aligned, then combined with another contig that is between its aligned parts.
870
+
871
+ ref_seq = 'X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
872
+
873
+ contigs = [
874
+ GenotypedContig (name = 'a' ,
875
+ seq = 'P' * 5 + 'C' * 10 + 'D' * 6 + 'A' * 10 + 'Z' * 5 ,
876
+ ref_name = 'testref' ,
877
+ group_ref = 'testref' ,
878
+ ref_seq = ref_seq ,
879
+ match_fraction = 0.3 ,
880
+ ),
881
+ GenotypedContig (name = 'b' ,
882
+ seq = 'Q' * 5 + 'B' * 20 + 'J' * 5 ,
776
883
ref_name = 'testref' ,
777
884
group_ref = 'testref' ,
778
885
ref_seq = ref_seq ,
@@ -782,6 +889,7 @@ def test_gap_around_big_insertion(exact_aligner, visualizer):
782
889
783
890
results = list (stitch_consensus (contigs ))
784
891
assert len (results ) == 1
892
+ assert results [0 ].seq == "A" * 10 + "B" * 20 + "C" * 10
785
893
assert len (visualizer ().elements ) > len (contigs )
786
894
787
895
0 commit comments