Skip to content

Commit 2beb27a

Browse files
committed
Contig stitcher: add few more tests
1 parent cffb352 commit 2beb27a

4 files changed

+253
-0
lines changed
Loading
Loading

Diff for: micall/tests/test_contig_stitcher.py

+80
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,41 @@ def test_correct_processing_of_two_overlapping_and_one_separate_contig(exact_ali
204204
assert len(visualizer().elements) > len(contigs)
205205

206206

207+
def test_correct_processing_of_two_overlapping_and_one_separate_contig_2(exact_aligner, visualizer):
208+
# Scenario: Two overlapping contigs are stitched together, the non-overlapping is kept separate.
209+
210+
ref_seq = 'Z' * 5 + 'A' * 100 + 'C' * 100 + 'T' * 100 + 'Y' * 5
211+
212+
contigs = [
213+
GenotypedContig(name='a',
214+
seq='N' * 5 + 'A' * 50 + 'C' * 20 + 'H' * 5,
215+
ref_name='testref',
216+
group_ref='testref',
217+
ref_seq=ref_seq,
218+
match_fraction=0.5,
219+
),
220+
GenotypedContig(name='b',
221+
seq='M' * 5 + 'C' * 50 + 'T' * 20 + 'J' * 5,
222+
ref_name='testref',
223+
group_ref='testref',
224+
ref_seq=ref_seq,
225+
match_fraction=0.5,
226+
),
227+
GenotypedContig(name='c',
228+
seq='Q' * 5 + 'C' * 20 + 'T' * 50 + 'I' * 5,
229+
ref_name='testref',
230+
group_ref='testref',
231+
ref_seq=ref_seq,
232+
match_fraction=0.5,
233+
),
234+
]
235+
236+
results = list(stitch_consensus(contigs))
237+
assert len(results) == 1
238+
assert results[0].seq == contigs[0].seq.rstrip('H') + 'C' * 30 + contigs[2].seq.lstrip('Q')
239+
assert len(visualizer().elements) > len(contigs)
240+
241+
207242
def test_stitching_of_all_overlapping_contigs_into_one_sequence(exact_aligner, visualizer):
208243
# Scenario: All contigs have some overlapping parts, resulting in one continuous sequence after stitching.
209244

@@ -755,6 +790,7 @@ def test_gap_around_small_insertion(exact_aligner, visualizer):
755790

756791
results = list(stitch_consensus(contigs))
757792
assert len(results) == 1
793+
assert results[0].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
758794
assert len(visualizer().elements) > len(contigs)
759795

760796

@@ -782,6 +818,50 @@ def test_gap_around_big_insertion(exact_aligner, visualizer):
782818

783819
results = list(stitch_consensus(contigs))
784820
assert len(results) == 1
821+
assert results[0].seq == "P" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "Z" * 5
822+
assert len(visualizer().elements) > len(contigs)
823+
824+
825+
def test_stitch_with_insertion(exact_aligner, visualizer):
826+
# Scenario: Contig is aligned with multiple hits, and the borders are correctly handled.
827+
828+
ref_seq='X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
829+
830+
contigs = [
831+
GenotypedContig(name='a',
832+
seq='P' * 5 + 'A' * 10 + 'D' * 6 + 'C' * 10 + 'Z' * 5,
833+
ref_name='testref',
834+
group_ref='testref',
835+
ref_seq=ref_seq,
836+
match_fraction=0.3,
837+
),
838+
]
839+
840+
results = list(stitch_consensus(contigs))
841+
assert len(results) == 1
842+
assert results[0].seq == "PPPPPAAAAAAAAAADDDDDDCCCCCCCCCCZZZZZ"
843+
assert len(visualizer().elements) > len(contigs)
844+
845+
846+
847+
def test_stitch_cross_alignment(exact_aligner, visualizer):
848+
# Scenario: Single contig is cross-aligned.
849+
850+
ref_seq='X' * 5 + 'A' * 10 + 'B' * 20 + 'C' * 10 + 'M' * 5
851+
852+
contigs = [
853+
GenotypedContig(name='a',
854+
seq='P' * 5 + 'C' * 10 + 'D' * 6 + 'A' * 10 + 'Z' * 5,
855+
ref_name='testref',
856+
group_ref='testref',
857+
ref_seq=ref_seq,
858+
match_fraction=0.3,
859+
),
860+
]
861+
862+
results = list(stitch_consensus(contigs))
863+
assert len(results) == 1
864+
assert results[0].seq == "AAAAAAAAAACCCCCCCCCC"
785865
assert len(visualizer().elements) > len(contigs)
786866

787867

0 commit comments

Comments
 (0)