Skip to content

Commit a943dd8

Browse files
committed
Contig stitcher: add few more tests
1 parent cffb352 commit a943dd8

5 files changed

+353
-2
lines changed
Loading
Loading
Loading

micall/tests/test_contig_stitcher.py

+110-2
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,41 @@ def test_correct_processing_of_two_overlapping_and_one_separate_contig(exact_ali
204204
assert len(visualizer().elements) > len(contigs)
205205

206206

207+
def test_correct_processing_of_two_overlapping_and_one_separate_contig_2(exact_aligner, visualizer):
208+
# Scenario: Two overlapping contigs are stitched together, the non-overlapping is kept separate.
209+
210+
ref_seq = 'Z' * 5 + 'A' * 100 + 'C' * 100 + 'T' * 100 + 'Y' * 5
211+
212+
contigs = [
213+
GenotypedContig(name='a',
214+
seq='N' * 5 + 'A' * 50 + 'C' * 20 + 'H' * 5,
215+
ref_name='testref',
216+
group_ref='testref',
217+
ref_seq=ref_seq,
218+
match_fraction=0.5,
219+
),
220+
GenotypedContig(name='b',
221+
seq='M' * 5 + 'C' * 50 + 'T' * 20 + 'J' * 5,
222+
ref_name='testref',
223+
group_ref='testref',
224+
ref_seq=ref_seq,
225+
match_fraction=0.5,
226+
),
227+
GenotypedContig(name='c',
228+
seq='Q' * 5 + 'C' * 20 + 'T' * 50 + 'I' * 5,
229+
ref_name='testref',
230+
group_ref='testref',
231+
ref_seq=ref_seq,
232+
match_fraction=0.5,
233+
),
234+
]
235+
236+
results = list(stitch_consensus(contigs))
237+
assert len(results) == 1
238+
assert results[0].seq == contigs[0].seq.rstrip('H') + 'C' * 30 + contigs[2].seq.lstrip('Q')
239+
assert len(visualizer().elements) > len(contigs)
240+
241+
207242
def test_stitching_of_all_overlapping_contigs_into_one_sequence(exact_aligner, visualizer):
208243
# Scenario: All contigs have some overlapping parts, resulting in one continuous sequence after stitching.
209244

@@ -745,7 +780,7 @@ def test_gap_around_small_insertion(exact_aligner, visualizer):
745780
match_fraction=0.3,
746781
),
747782
GenotypedContig(name='b',
748-
seq='B' * 20,
783+
seq='Q' * 5 + 'B' * 20 + 'J' * 5,
749784
ref_name='testref',
750785
group_ref='testref',
751786
ref_seq=ref_seq,
@@ -755,6 +790,7 @@ def test_gap_around_small_insertion(exact_aligner, visualizer):
755790

756791
results = list(stitch_consensus(contigs))
757792
assert len(results) == 1
793+
assert results[0].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
758794
assert len(visualizer().elements) > len(contigs)
759795

760796

@@ -772,7 +808,78 @@ def test_gap_around_big_insertion(exact_aligner, visualizer):
772808
match_fraction=0.3,
773809
),
774810
GenotypedContig(name='b',
775-
seq='B' * 20,
811+
seq='Q' * 5 + 'B' * 20 + 'J' * 5,
812+
ref_name='testref',
813+
group_ref='testref',
814+
ref_seq=ref_seq,
815+
match_fraction=0.3,
816+
),
817+
]
818+
819+
results = list(stitch_consensus(contigs))
820+
assert len(results) == 1
821+
assert results[0].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
822+
assert len(visualizer().elements) > len(contigs)
823+
824+
825+
def test_stitch_with_insertion(exact_aligner, visualizer):
826+
# Scenario: Contig is aligned with multiple hits, and the borders are correctly handled.
827+
828+
ref_seq='X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
829+
830+
contigs = [
831+
GenotypedContig(name='a',
832+
seq='P' * 5 + 'A' * 10 + 'D' * 6 + 'C' * 10 + 'Z' * 5,
833+
ref_name='testref',
834+
group_ref='testref',
835+
ref_seq=ref_seq,
836+
match_fraction=0.3,
837+
),
838+
]
839+
840+
results = list(stitch_consensus(contigs))
841+
assert len(results) == 1
842+
assert results[0].seq == "PPPPPAAAAAAAAAADDDDDDCCCCCCCCCCZZZZZ"
843+
assert len(visualizer().elements) > len(contigs)
844+
845+
846+
847+
def test_stitch_cross_alignment(exact_aligner, visualizer):
848+
# Scenario: Single contig is cross-aligned.
849+
850+
ref_seq='X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
851+
852+
contigs = [
853+
GenotypedContig(name='a',
854+
seq='P' * 5 + 'C' * 10 + 'D' * 6 + 'A' * 10 + 'Z' * 5,
855+
ref_name='testref',
856+
group_ref='testref',
857+
ref_seq=ref_seq,
858+
match_fraction=0.3,
859+
),
860+
]
861+
862+
results = list(stitch_consensus(contigs))
863+
assert len(results) == 1
864+
assert results[0].seq == "AAAAAAAAAACCCCCCCCCC"
865+
assert len(visualizer().elements) > len(contigs)
866+
867+
868+
def test_cross_alignment_around_small_insertion(exact_aligner, visualizer):
869+
# Scenario: Single contig is cross-aligned, then combined with another contig that is between its aligned parts.
870+
871+
ref_seq='X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
872+
873+
contigs = [
874+
GenotypedContig(name='a',
875+
seq='P' * 5 + 'C' * 10 + 'D' * 6 + 'A' * 10 + 'Z' * 5,
876+
ref_name='testref',
877+
group_ref='testref',
878+
ref_seq=ref_seq,
879+
match_fraction=0.3,
880+
),
881+
GenotypedContig(name='b',
882+
seq='Q' * 5 + 'B' * 20 + 'J' * 5,
776883
ref_name='testref',
777884
group_ref='testref',
778885
ref_seq=ref_seq,
@@ -782,6 +889,7 @@ def test_gap_around_big_insertion(exact_aligner, visualizer):
782889

783890
results = list(stitch_consensus(contigs))
784891
assert len(results) == 1
892+
assert results[0].seq == "A" * 10 + "B" * 20 + "C" * 10
785893
assert len(visualizer().elements) > len(contigs)
786894

787895

0 commit comments

Comments
 (0)