Skip to content

Commit ebe1e9e

Browse files
committed
Cigar tools: swap names of query and reference strips
Also adjust the contig stitcher to the change.
1 parent a86530f commit ebe1e9e

File tree

4 files changed

+90
-90
lines changed

4 files changed

+90
-90
lines changed

micall/core/contig_stitcher.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,13 @@ def cut_reference(self, cut_point: float) -> Tuple['AlignedContig', 'AlignedCont
9797
return (left, right)
9898

9999

100-
def lstrip_query(self) -> 'AlignedContig':
100+
def lstrip(self) -> 'AlignedContig':
101101
"""
102102
Trims the query sequence of the contig from its beginning up to the start of the
103103
alignment. The CIGAR alignment is also updated to reflect the trimming.
104104
"""
105105

106-
alignment = self.alignment.lstrip_query().lstrip_reference()
106+
alignment = self.alignment.lstrip_reference().lstrip_query()
107107
q_remainder, query = self.cut_query(alignment.q_st - 0.5)
108108
alignment = alignment.translate(0, -1 * alignment.q_st)
109109
result = AlignedContig.make(query, alignment, self.strand)
@@ -114,13 +114,13 @@ def lstrip_query(self) -> 'AlignedContig':
114114
return result
115115

116116

117-
def rstrip_query(self) -> 'AlignedContig':
117+
def rstrip(self) -> 'AlignedContig':
118118
"""
119119
Trims the query sequence of the contig from its end based on the end of the
120120
alignment. The CIGAR alignment is also updated to reflect the trimming.
121121
"""
122122

123-
alignment = self.alignment.rstrip_query().rstrip_reference()
123+
alignment = self.alignment.rstrip_reference().rstrip_query()
124124
query, q_remainder = self.cut_query(alignment.q_ei + 0.5)
125125
result = AlignedContig.make(query, alignment, self.strand)
126126
logger.debug("Doing rstrip of %r resulted in %r, so %s (len %s) became %s (len %s)",
@@ -196,9 +196,9 @@ def combine_contigs(parts: List[AlignedContig]) -> AlignedContig:
196196
stripped_parts = []
197197
for prev_part, part, next_part in sliding_window(parts):
198198
if prev_part is not None:
199-
part = part.lstrip_query()
199+
part = part.lstrip()
200200
if next_part is not None:
201-
part = part.rstrip_query()
201+
part = part.rstrip()
202202
stripped_parts.append(part)
203203

204204
ret = reduce(AlignedContig.munge, stripped_parts)
@@ -288,9 +288,9 @@ def is_out_of_order(name: str) -> bool:
288288
if isinstance(contig, AlignedContig):
289289
name = contig.name
290290
if prev_contig is not None or is_out_of_order(name):
291-
contig = contig.lstrip_query()
291+
contig = contig.lstrip()
292292
if next_contig is not None or is_out_of_order(name):
293-
contig = contig.rstrip_query()
293+
contig = contig.rstrip()
294294

295295
yield contig
296296

@@ -426,10 +426,10 @@ def stitch_2_contigs(left, right):
426426
# Cut in 4 parts.
427427
left_remainder, left_overlap = left.cut_reference(right.alignment.r_st - 0.5)
428428
right_overlap, right_remainder = right.cut_reference(left.alignment.r_ei + 0.5)
429-
left_overlap = left_overlap.rstrip_query().lstrip_query()
430-
right_overlap = right_overlap.lstrip_query().rstrip_query()
431-
left_remainder = left_remainder.rstrip_query()
432-
right_remainder = right_remainder.lstrip_query()
429+
left_overlap = left_overlap.rstrip().lstrip()
430+
right_overlap = right_overlap.lstrip().rstrip()
431+
left_remainder = left_remainder.rstrip()
432+
right_remainder = right_remainder.lstrip()
433433

434434
logger.debug("Stitching %r at %s (len %s) with %r at %s (len %s)."
435435
" The left_overlap %r is at %s (len %s)"
@@ -603,8 +603,8 @@ def try_split(contig):
603603
if covered(contig, gap):
604604
midpoint = gap.r_st + (gap.r_ei - gap.r_st) / 2 + contig.alignment.epsilon
605605
left_part, right_part = contig.cut_reference(midpoint)
606-
left_part = left_part.rstrip_query()
607-
right_part = right_part.lstrip_query()
606+
left_part = left_part.rstrip()
607+
right_part = right_part.lstrip()
608608

609609
contigs.remove(contig)
610610
contigs.append(left_part)

micall/tests/test_cigar_tools.py

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def test_cigar_hit_ref_cut_add_prop_exhaustive(hit, cut_point):
329329
assert left + right == hit
330330

331331

332-
lstrip_reference_cases = [
332+
lstrip_query_cases = [
333333
('9M@1->1', '9M@1->1'),
334334
('5M6D@1->1', '5M6D@1->1'),
335335
('6D5M@1->1', '6D5M@1->1'),
@@ -352,7 +352,7 @@ def test_cigar_hit_ref_cut_add_prop_exhaustive(hit, cut_point):
352352
]
353353

354354

355-
rstrip_reference_cases = [
355+
rstrip_query_cases = [
356356
('9M@1->1', '9M@1->1'),
357357
('5M6D@1->1', '5M6D@1->1'),
358358
('5M6I@1->1', '5M@1->1'),
@@ -375,7 +375,7 @@ def test_cigar_hit_ref_cut_add_prop_exhaustive(hit, cut_point):
375375
]
376376

377377

378-
lstrip_query_cases = [
378+
lstrip_reference_cases = [
379379
('9M@1->1', '9M@1->1'),
380380
('5M6D@1->1', '5M6D@1->1'),
381381
('6D5M@1->1', '5M@1->7'),
@@ -398,7 +398,7 @@ def test_cigar_hit_ref_cut_add_prop_exhaustive(hit, cut_point):
398398
]
399399

400400

401-
rstrip_query_cases = [
401+
rstrip_reference_cases = [
402402
('9M@1->1', '9M@1->1'),
403403
('5M6D@1->1', '5M@1->1'),
404404
('5M6I@1->1', '5M6I@1->1'),
@@ -423,22 +423,22 @@ def test_cigar_hit_ref_cut_add_prop_exhaustive(hit, cut_point):
423423

424424
strip_prop_cases_all = \
425425
[x[0] for x in cigar_hit_ref_cut_cases] + \
426-
[x[0] for x in lstrip_reference_cases] + \
427-
[x[0] for x in rstrip_reference_cases]
426+
[x[0] for x in lstrip_query_cases] + \
427+
[x[0] for x in rstrip_query_cases]
428428

429429

430-
@pytest.mark.parametrize('hit, expected', lstrip_reference_cases)
430+
@pytest.mark.parametrize('hit, expected', lstrip_query_cases)
431431
def test_cigar_hit_lstrip_reference(hit, expected):
432432
hit = parsed_hit(hit)
433433
expected = parsed_hit(expected)
434-
assert expected == hit.lstrip_reference()
434+
assert expected == hit.lstrip_query()
435435

436436

437-
@pytest.mark.parametrize('hit, expected', rstrip_reference_cases)
437+
@pytest.mark.parametrize('hit, expected', rstrip_query_cases)
438438
def test_cigar_hit_rstrip_reference(hit, expected):
439439
hit = parsed_hit(hit)
440440
expected = parsed_hit(expected)
441-
assert expected == hit.rstrip_reference()
441+
assert expected == hit.rstrip_query()
442442

443443

444444

@@ -449,8 +449,8 @@ def test_cigar_hit_reference_strip_combines_with_connect(hit):
449449
for cut_point in range(hit.r_st - 1, hit.r_ei):
450450
left, right = hit.cut_reference(cut_point + hit.epsilon)
451451

452-
left = left.rstrip_reference()
453-
right = right.lstrip_reference()
452+
left = left.rstrip_query()
453+
right = right.lstrip_query()
454454

455455
assert left.connect(right).coordinate_mapping.ref_to_query \
456456
== hit.coordinate_mapping.ref_to_query
@@ -463,8 +463,8 @@ def test_cigar_hit_reference_strip_combines_with_add(hit):
463463
for cut_point in range(hit.r_st - 1, hit.r_ei):
464464
left, right = hit.cut_reference(cut_point + hit.epsilon)
465465

466-
left = left.rstrip_reference()
467-
right = right.lstrip_reference()
466+
left = left.rstrip_query()
467+
right = right.lstrip_query()
468468

469469
if left.touches(right):
470470
assert left + right == hit
@@ -474,53 +474,53 @@ def test_cigar_hit_reference_strip_combines_with_add(hit):
474474
def test_cigar_hit_reference_strip_never_crashes(hit):
475475
hit = parsed_hit(hit)
476476

477-
hit.rstrip_reference().lstrip_reference()
478-
hit.lstrip_reference().rstrip_reference()
479-
hit.lstrip_reference().lstrip_reference()
480-
hit.rstrip_reference().rstrip_reference()
477+
hit.rstrip_query().lstrip_query()
478+
hit.lstrip_query().rstrip_query()
479+
hit.lstrip_query().lstrip_query()
480+
hit.rstrip_query().rstrip_query()
481481

482482

483483
@pytest.mark.parametrize('hit', strip_prop_cases_all)
484484
def test_cigar_hit_reference_strip_is_idempotent(hit):
485485
hit = parsed_hit(hit)
486486

487-
h1 = hit.rstrip_reference()
488-
assert h1 == h1.rstrip_reference() == h1.rstrip_reference().rstrip_reference()
487+
h1 = hit.rstrip_query()
488+
assert h1 == h1.rstrip_query() == h1.rstrip_query().rstrip_query()
489489

490-
h1 = hit.lstrip_reference()
491-
assert h1 == h1.lstrip_reference() == h1.lstrip_reference().lstrip_reference()
490+
h1 = hit.lstrip_query()
491+
assert h1 == h1.lstrip_query() == h1.lstrip_query().lstrip_query()
492492

493-
h1 = hit.lstrip_reference().rstrip_reference()
494-
assert h1 == h1.lstrip_reference() == h1.rstrip_reference()
493+
h1 = hit.lstrip_query().rstrip_query()
494+
assert h1 == h1.lstrip_query() == h1.rstrip_query()
495495

496-
h1 = hit.rstrip_reference().lstrip_reference()
497-
assert h1 == h1.rstrip_reference() == h1.lstrip_reference()
496+
h1 = hit.rstrip_query().lstrip_query()
497+
assert h1 == h1.rstrip_query() == h1.lstrip_query()
498498

499499

500500
@pytest.mark.parametrize('hit', strip_prop_cases_all)
501501
def test_cigar_hit_reference_strips_are_commutative(hit):
502502
hit = parsed_hit(hit)
503503

504504
if len(hit.cigar.coordinate_mapping.ref_to_query) > 0:
505-
assert hit.rstrip_reference().lstrip_reference() \
506-
== hit.lstrip_reference().rstrip_reference()
505+
assert hit.rstrip_query().lstrip_query() \
506+
== hit.lstrip_query().rstrip_query()
507507
else:
508-
assert hit.rstrip_reference().lstrip_reference().cigar \
509-
== hit.lstrip_reference().rstrip_reference().cigar
508+
assert hit.rstrip_query().lstrip_query().cigar \
509+
== hit.lstrip_query().rstrip_query().cigar
510510

511511

512-
@pytest.mark.parametrize('hit, expected', lstrip_query_cases)
512+
@pytest.mark.parametrize('hit, expected', lstrip_reference_cases)
513513
def test_cigar_hit_lstrip_query(hit, expected):
514514
hit = parsed_hit(hit)
515515
expected = parsed_hit(expected)
516-
assert expected == hit.lstrip_query()
516+
assert expected == hit.lstrip_reference()
517517

518518

519-
@pytest.mark.parametrize('hit, expected', rstrip_query_cases)
519+
@pytest.mark.parametrize('hit, expected', rstrip_reference_cases)
520520
def test_cigar_hit_rstrip_query(hit, expected):
521521
hit = parsed_hit(hit)
522522
expected = parsed_hit(expected)
523-
assert expected == hit.rstrip_query()
523+
assert expected == hit.rstrip_reference()
524524

525525

526526
@pytest.mark.parametrize('hit', strip_prop_cases_all)
@@ -530,8 +530,8 @@ def test_cigar_hit_query_strip_combines_with_connect(hit):
530530
for cut_point in range(hit.r_st - 1, hit.r_ei):
531531
left, right = hit.cut_reference(cut_point + hit.epsilon)
532532

533-
left = left.rstrip_query()
534-
right = right.lstrip_query()
533+
left = left.rstrip_reference()
534+
right = right.lstrip_reference()
535535

536536
assert left.connect(right).coordinate_mapping.ref_to_query \
537537
== hit.coordinate_mapping.ref_to_query
@@ -544,8 +544,8 @@ def test_cigar_hit_query_strip_combines_with_add(hit):
544544
for cut_point in range(hit.r_st - 1, hit.r_ei):
545545
left, right = hit.cut_reference(cut_point + hit.epsilon)
546546

547-
left = left.rstrip_query()
548-
right = right.lstrip_query()
547+
left = left.rstrip_reference()
548+
right = right.lstrip_reference()
549549

550550
if left.touches(right):
551551
assert left + right == hit
@@ -555,39 +555,39 @@ def test_cigar_hit_query_strip_combines_with_add(hit):
555555
def test_cigar_hit_strips_work_together(hit):
556556
hit = parsed_hit(hit)
557557

558-
rstrip = str(hit.rstrip_query().rstrip_reference().cigar)
558+
rstrip = str(hit.rstrip_reference().rstrip_query().cigar)
559559
assert not rstrip.endswith("I") and not rstrip.endswith("D")
560-
lstrip = str(hit.lstrip_query().lstrip_reference().cigar)
560+
lstrip = str(hit.lstrip_reference().lstrip_query().cigar)
561561
assert not lstrip.startswith("I") and not lstrip.startswith("D")
562562

563563

564564
@pytest.mark.parametrize('hit', strip_prop_cases_all)
565565
def test_cigar_hit_query_strip_is_idempotent(hit):
566566
hit = parsed_hit(hit)
567567

568-
h1 = hit.rstrip_query()
569-
assert h1 == h1.rstrip_query() == h1.rstrip_query().rstrip_query()
568+
h1 = hit.rstrip_reference()
569+
assert h1 == h1.rstrip_reference() == h1.rstrip_reference().rstrip_reference()
570570

571-
h1 = hit.lstrip_query()
572-
assert h1 == h1.lstrip_query() == h1.lstrip_query().lstrip_query()
571+
h1 = hit.lstrip_reference()
572+
assert h1 == h1.lstrip_reference() == h1.lstrip_reference().lstrip_reference()
573573

574-
h1 = hit.lstrip_query().rstrip_query()
575-
assert h1 == h1.lstrip_query() == h1.rstrip_query()
574+
h1 = hit.lstrip_reference().rstrip_reference()
575+
assert h1 == h1.lstrip_reference() == h1.rstrip_reference()
576576

577-
h1 = hit.rstrip_query().lstrip_query()
578-
assert h1 == h1.rstrip_query() == h1.lstrip_query()
577+
h1 = hit.rstrip_reference().lstrip_reference()
578+
assert h1 == h1.rstrip_reference() == h1.lstrip_reference()
579579

580580

581581
@pytest.mark.parametrize('hit', strip_prop_cases_all)
582582
def test_cigar_hit_query_strips_are_commutative(hit):
583583
hit = parsed_hit(hit)
584584

585585
if len(hit.cigar.coordinate_mapping.ref_to_query) > 0:
586-
assert hit.rstrip_query().lstrip_query() \
587-
== hit.lstrip_query().rstrip_query()
586+
assert hit.rstrip_reference().lstrip_reference() \
587+
== hit.lstrip_reference().rstrip_reference()
588588
else:
589-
assert hit.rstrip_query().lstrip_query().cigar \
590-
== hit.lstrip_query().rstrip_query().cigar
589+
assert hit.rstrip_reference().lstrip_reference().cigar \
590+
== hit.lstrip_reference().rstrip_reference().cigar
591591

592592

593593
@pytest.mark.parametrize('hit, cut_point', [(x[0], x[1]) for x in cigar_hit_ref_cut_cases

micall/tests/test_contig_stitcher.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ def test_stitching_contig_with_small_covered_gap(exact_aligner, visualizer):
543543

544544
assert len(visualizer().elements) > len(contigs)
545545

546-
assert all(x.seq == x.lstrip_query().rstrip_query().seq for x in results)
546+
assert all(x.seq == x.lstrip().rstrip().seq for x in results)
547547
assert { contig.seq for contig in contigs } \
548548
== { contig.seq for contig in results }
549549

@@ -568,10 +568,10 @@ def test_stitching_partial_align(exact_aligner, visualizer):
568568

569569
assert len(visualizer().elements) > len(contigs)
570570

571-
assert all(x.seq != x.lstrip_query().rstrip_query().seq for x in results)
571+
assert all(x.seq != x.lstrip().rstrip().seq for x in results)
572572

573573
assert { contig.seq for contig in contigs } \
574-
!= { contig.lstrip_query().rstrip_query().seq for contig in results }
574+
!= { contig.lstrip().rstrip().seq for contig in results }
575575

576576

577577
def test_partial_align_consensus(exact_aligner, visualizer):
@@ -625,7 +625,7 @@ def test_stitching_partial_align_multiple_sequences(exact_aligner, visualizer):
625625
assert len(visualizer().elements) > len(contigs)
626626

627627
assert { contig.seq for contig in contigs } \
628-
!= { contig.lstrip_query().rstrip_query().seq for contig in results }
628+
!= { contig.lstrip().rstrip().seq for contig in results }
629629

630630

631631
def test_partial_align_consensus_multiple_sequences(exact_aligner, visualizer):

0 commit comments

Comments
 (0)