Skip to content

Commit cffb352

Browse files
committed
Contig stitcher: improve visualization of unaligned parts
1 parent 68aa30c commit cffb352

File tree

1 file changed

+12
-19
lines changed

1 file changed

+12
-19
lines changed

micall/core/plot_contigs.py

+12-19
Original file line numberDiff line numberDiff line change
@@ -516,19 +516,15 @@ def graph_sum(graph_a, graph_b):
516516
def symmetric_closure(graph):
517517
return graph_sum(graph, inverse_graph(graph))
518518

519-
def hits_to_insertions(contig: GenotypedContig, hits: List[CigarHit]):
520-
for hit in hits:
521-
# yield CigarHit.from_default_alignment(q_st=0, q_ei=hit.q_st - 1, r_st=hit.r_st, r_ei=hit.r_st - 1)
522-
yield from hit.insertions()
523-
# yield CigarHit.from_default_alignment(q_st=hit.q_ei + 1, q_ei=len(contig.seq) - 1, r_st=hit.r_ei + 1, r_ei=hit.r_ei)
524-
525-
def record_unaligned_parts(contig: GenotypedContig, connected: List[CigarHit]):
526-
all_insertions = list(hits_to_insertions(contig, connected))
527-
nonempty_insertions = [gap for gap in all_insertions if gap.query_length > 0]
528-
for insertion in nonempty_insertions:
529-
query = dataclasses.replace(contig, name=f"u{len(complete_contig_map)}", seq='A' * insertion.query_length)
519+
def record_unaligned_parts(result: AlignedContig, original: AlignedContig):
520+
length = abs(result.alignment.query_length - original.alignment.query_length)
521+
if length > 0:
522+
q_st = max(result.alignment.q_st, original.alignment.q_st)
523+
r_st = min(result.alignment.r_st, original.alignment.r_st)
524+
insertion = CigarHit.from_default_alignment(q_st=q_st, q_ei=q_st + length - 1, r_st=r_st, r_ei=r_st-1)
525+
query = dataclasses.replace(original, name=f"u{len(complete_contig_map)}", seq='A' * insertion.query_length)
530526
fake_aligned = AlignedContig.make(query=query, alignment=insertion, strand="forward")
531-
record_contig(fake_aligned, [contig])
527+
record_contig(fake_aligned, [original])
532528
record_bad_contig(fake_aligned, unaligned)
533529
record_alive(fake_aligned)
534530

@@ -583,16 +579,17 @@ def record_rstrip(result: GenotypedContig, original: GenotypedContig):
583579
record_contig(event.result, [event.contig])
584580
record_alive(event.result)
585581
elif isinstance(event, events.HitNumber):
586-
record_unaligned_parts(event.contig, event.connected)
587582
record_alive(event.contig)
588583
elif isinstance(event, events.Munge):
589584
record_contig(event.result, [event.left, event.right])
590585
elif isinstance(event, events.LStrip):
591586
record_contig(event.result, [event.original])
592587
record_lstrip(event.result, event.original)
588+
record_unaligned_parts(event.result, event.original)
593589
elif isinstance(event, events.RStrip):
594590
record_contig(event.result, [event.original])
595591
record_rstrip(event.result, event.original)
592+
record_unaligned_parts(event.result, event.original)
596593
elif isinstance(event, events.Overlap):
597594
overlaps_list.append(event.left_overlap.name)
598595
overlaps_list.append(event.right_overlap.name)
@@ -790,12 +787,8 @@ def intervals_overlap(x, y):
790787

791788
todo_names = aligned_names
792789
for contig_name in unaligned_names:
793-
all_other = [contig_map[name] for name in aligned_names]
794-
aligned_other = [contig for contig in all_other if isinstance(contig, AlignedContig)]
795-
current = contig_map[contig_name]
796-
if isinstance(current, AlignedContig) and \
797-
not any(overlaps(current, other) for other in aligned_other):
798-
todo_names.append(contig_name)
790+
todo_names.append(contig_name)
791+
if contig_name not in discarded:
799792
discarded.append(contig_name)
800793

801794
todo_names = list(sorted(todo_names, key=lambda name: query_position_map.get(name, (-1, -1))))

0 commit comments

Comments
 (0)