Skip to content

Commit 2153c12

Browse files
committed
Contig stitcher: improve visualization of unaligned parts
1 parent 68aa30c commit 2153c12

File tree

3 files changed

+22
-23
lines changed

3 files changed

+22
-23
lines changed

Diff for: micall/core/contig_stitcher.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -603,23 +603,22 @@ def try_split(contig):
603603

604604
if covered(contig, gap):
605605
midpoint = gap.r_st + (gap.r_ei - gap.r_st) / 2 + contig.alignment.epsilon
606-
left_part, right_part = contig.cut_reference(midpoint)
607-
left_part = left_part.rstrip()
608-
right_part = right_part.lstrip()
606+
left_cut, right_cut = contig.cut_reference(midpoint)
607+
left_part = left_cut.rstrip()
608+
right_part = right_cut.lstrip()
609609

610610
contigs.remove(contig)
611611
contigs.append(left_part)
612612
contigs.append(right_part)
613613
process_queue.put(right_part)
614614

615615
logger.debug("Split contig %r at %s around its gap at [%s, %s]->[%s, %s]. "
616-
"Left part: %r at %s, "
617-
"right part: %r at %s.",
616+
"Left part: %r at %s, right part: %r at %s.",
618617
contig.name, contig.alignment,
619618
gap.q_st, gap.q_ei, gap.r_st, gap.r_ei,
620619
left_part.name, left_part.alignment,
621620
right_part.name, right_part.alignment)
622-
context.get().emit(events.SplitGap(contig, gap, left_part, right_part))
621+
context.get().emit(events.SplitGap(contig, gap, left_part, right_part, left_cut, right_cut))
623622
return
624623

625624
process_queue: LifoQueue = LifoQueue()

Diff for: micall/core/plot_contigs.py

+15-17
Original file line numberDiff line numberDiff line change
@@ -516,19 +516,15 @@ def graph_sum(graph_a, graph_b):
516516
def symmetric_closure(graph):
517517
return graph_sum(graph, inverse_graph(graph))
518518

519-
def hits_to_insertions(contig: GenotypedContig, hits: List[CigarHit]):
520-
for hit in hits:
521-
# yield CigarHit.from_default_alignment(q_st=0, q_ei=hit.q_st - 1, r_st=hit.r_st, r_ei=hit.r_st - 1)
522-
yield from hit.insertions()
523-
# yield CigarHit.from_default_alignment(q_st=hit.q_ei + 1, q_ei=len(contig.seq) - 1, r_st=hit.r_ei + 1, r_ei=hit.r_ei)
524-
525-
def record_unaligned_parts(contig: GenotypedContig, connected: List[CigarHit]):
526-
all_insertions = list(hits_to_insertions(contig, connected))
527-
nonempty_insertions = [gap for gap in all_insertions if gap.query_length > 0]
528-
for insertion in nonempty_insertions:
529-
query = dataclasses.replace(contig, name=f"u{len(complete_contig_map)}", seq='A' * insertion.query_length)
519+
def record_unaligned_parts(result: AlignedContig, original: AlignedContig):
520+
length = abs(result.alignment.query_length - original.alignment.query_length)
521+
if length > 0:
522+
q_st = max(result.alignment.q_st, original.alignment.q_st)
523+
r_st = min(result.alignment.r_st, original.alignment.r_st)
524+
insertion = CigarHit.from_default_alignment(q_st=q_st, q_ei=q_st + length - 1, r_st=r_st, r_ei=r_st-1)
525+
query = dataclasses.replace(original, name=f"u{len(complete_contig_map)}", seq='A' * insertion.query_length)
530526
fake_aligned = AlignedContig.make(query=query, alignment=insertion, strand="forward")
531-
record_contig(fake_aligned, [contig])
527+
record_contig(fake_aligned, [original])
532528
record_bad_contig(fake_aligned, unaligned)
533529
record_alive(fake_aligned)
534530

@@ -583,16 +579,17 @@ def record_rstrip(result: GenotypedContig, original: GenotypedContig):
583579
record_contig(event.result, [event.contig])
584580
record_alive(event.result)
585581
elif isinstance(event, events.HitNumber):
586-
record_unaligned_parts(event.contig, event.connected)
587582
record_alive(event.contig)
588583
elif isinstance(event, events.Munge):
589584
record_contig(event.result, [event.left, event.right])
590585
elif isinstance(event, events.LStrip):
591586
record_contig(event.result, [event.original])
592587
record_lstrip(event.result, event.original)
588+
record_unaligned_parts(event.result, event.original)
593589
elif isinstance(event, events.RStrip):
594590
record_contig(event.result, [event.original])
595591
record_rstrip(event.result, event.original)
592+
record_unaligned_parts(event.result, event.original)
596593
elif isinstance(event, events.Overlap):
597594
overlaps_list.append(event.left_overlap.name)
598595
overlaps_list.append(event.right_overlap.name)
@@ -793,10 +790,11 @@ def intervals_overlap(x, y):
793790
all_other = [contig_map[name] for name in aligned_names]
794791
aligned_other = [contig for contig in all_other if isinstance(contig, AlignedContig)]
795792
current = contig_map[contig_name]
796-
if isinstance(current, AlignedContig) and \
797-
not any(overlaps(current, other) for other in aligned_other):
798-
todo_names.append(contig_name)
799-
discarded.append(contig_name)
793+
# if isinstance(current, AlignedContig) and \
794+
# not any(overlaps(current, other) for other in aligned_other):
795+
# if any(aligned in parent_graph.get(contig_name, []) for aligned in aligned_names):
796+
todo_names.append(contig_name)
797+
discarded.append(contig_name)
800798

801799
todo_names = list(sorted(todo_names, key=lambda name: query_position_map.get(name, (-1, -1))))
802800
for k, child_name in enumerate(todo_names):

Diff for: micall/utils/contig_stitcher_events.py

+2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ class SplitGap:
127127
gap: 'CigarHit'
128128
left: 'AlignedContig'
129129
right: 'AlignedContig'
130+
left_cut: 'AlignedContig'
131+
right_cut: 'AlignedContig'
130132

131133

132134
@dataclass

0 commit comments

Comments
 (0)