Skip to content

Commit 2c203d0

Browse files
committed
Contig stitcher: fix unaligned display in cross alignment case
1 parent 88c900e commit 2c203d0

5 files changed

+131
-60
lines changed

micall/core/contig_stitcher.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,16 @@ def is_out_of_order(name: str) -> bool:
287287
sorted_by_query = sorted(contigs, key=lambda contig: get_indexes(contig.name))
288288
for prev_contig, contig, next_contig in sliding_window(sorted_by_query):
289289
if isinstance(contig, AlignedContig):
290-
name = contig.name
291-
if prev_contig is not None or is_out_of_order(name):
290+
original = contig
291+
start = prev_contig.alignment.q_ei + 1 if isinstance(prev_contig, AlignedContig) else 0
292+
end = next_contig.alignment.q_st - 1 if isinstance(next_contig, AlignedContig) else len(contig.seq) - 1
293+
294+
if prev_contig is not None or is_out_of_order(original.name):
292295
contig = contig.lstrip()
293-
if next_contig is not None or is_out_of_order(name):
296+
context.get().emit(events.InitialStrip(original, start, original.alignment.q_st - 1))
297+
if next_contig is not None or is_out_of_order(original.name):
294298
contig = contig.rstrip()
299+
context.get().emit(events.InitialStrip(original, original.alignment.q_ei + 1, end))
295300

296301
yield contig
297302

micall/core/plot_contigs.py

+28-17
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,9 @@ def build_stitcher_figure(logs: Iterable[events.EventType]) -> Figure:
427427
combine_right_edge: Dict[str, str] = {}
428428
children_join_points: List[str] = []
429429
query_position_map: Dict[str, Tuple[int, int]] = {}
430-
initial_alignments: Dict[str, List[CigarHit]] = {}
431430
lstrip_map: Dict[str, str] = {}
432431
rstrip_map: Dict[str, str] = {}
432+
strip_set: Set[Tuple[str, int, int]] = set()
433433

434434
def remove_intermediate_edges(graph):
435435
ret = {}
@@ -516,18 +516,31 @@ def graph_sum(graph_a, graph_b):
516516
def symmetric_closure(graph):
517517
return graph_sum(graph, inverse_graph(graph))
518518

519-
def record_unaligned_parts(result: AlignedContig, original: AlignedContig):
520-
length = abs(result.alignment.query_length - original.alignment.query_length)
521-
if length > 0:
522-
q_st = original.alignment.q_st
523-
r_st = original.alignment.r_st
524-
insertion = CigarHit.from_default_alignment(q_st=q_st, q_ei=q_st + length - 1, r_st=r_st, r_ei=r_st-1)
525-
query = dataclasses.replace(original, name=f"u{len(complete_contig_map)}", seq='A' * insertion.query_length)
526-
fake_aligned = AlignedContig.make(query=query, alignment=insertion, strand=original.strand)
519+
def record_unaligned_parts(original: AlignedContig, q_st: int, r_st: int, length: int):
520+
key = (original.seq, q_st, q_st + length)
521+
if length > 0 and key not in strip_set:
522+
strip_set.add(key)
523+
alignment = CigarHit.from_default_alignment(q_st=q_st, q_ei=q_st + length - 1, r_st=r_st, r_ei=r_st-1)
524+
seq = 'A' * alignment.query_length
525+
query = dataclasses.replace(original, name=f"u{len(complete_contig_map)}", seq=seq)
526+
fake_aligned = AlignedContig.make(query, alignment, strand=original.strand)
527527
record_contig(fake_aligned, [original])
528528
record_bad_contig(fake_aligned, unaligned)
529529
record_alive(fake_aligned)
530530
return fake_aligned
531+
return None
532+
533+
def record_regular_strip(result: AlignedContig, original: AlignedContig):
534+
length = abs(result.alignment.query_length - original.alignment.query_length)
535+
q_st = original.alignment.q_st
536+
r_st = original.alignment.r_st
537+
return record_unaligned_parts(original, q_st=q_st, r_st=r_st, length=length)
538+
539+
def record_initial_strip(original: AlignedContig, q_st: int, q_ei: int):
540+
length = q_ei - q_st + 1
541+
contig = record_unaligned_parts(original, q_st, original.alignment.r_st, length)
542+
if contig:
543+
query_position_map[contig.name] = (q_st, q_ei)
531544

532545
def record_contig(contig: GenotypedContig, parents: List[GenotypedContig]):
533546
complete_contig_map[contig.name] = contig
@@ -546,17 +559,15 @@ def record_bad_contig(contig: GenotypedContig, lst: List[str]):
546559
complete_contig_map[contig.name] = contig
547560
lst.append(contig.name)
548561

549-
def record_lstrip(result: GenotypedContig, original: GenotypedContig):
562+
def record_lstrip(result: AlignedContig, original: AlignedContig):
550563
lstrip_map[result.name] = original.name
551-
unaligned = record_unaligned_parts(result, original)
552-
assert original.name != result.name
564+
unaligned = record_regular_strip(result, original)
553565
if unaligned:
554566
lstrip_map[unaligned.name] = result.name
555567

556-
def record_rstrip(result: GenotypedContig, original: GenotypedContig):
568+
def record_rstrip(result: AlignedContig, original: AlignedContig):
557569
rstrip_map[result.name] = original.name
558-
unaligned = record_unaligned_parts(result, original)
559-
assert original.name != result.name
570+
unaligned = record_regular_strip(result, original)
560571
if unaligned:
561572
rstrip_map[unaligned.name] = result.name
562573

@@ -597,6 +608,8 @@ def record_rstrip(result: GenotypedContig, original: GenotypedContig):
597608
elif isinstance(event, events.RStrip):
598609
record_contig(event.result, [event.original])
599610
record_rstrip(event.result, event.original)
611+
elif isinstance(event, events.InitialStrip):
612+
record_initial_strip(event.contig, event.q_st, event.q_ei)
600613
elif isinstance(event, events.Overlap):
601614
overlaps_list.append(event.left_overlap.name)
602615
overlaps_list.append(event.right_overlap.name)
@@ -680,11 +693,9 @@ def set_query_position(contig_name: str) -> None:
680693
children_names = children_graph.get(contig.name, [])
681694

682695
def copy_from_parent(contig: AlignedContig, parent_name: str) -> None:
683-
parent = contig_map[parent_name]
684696
if parent_name in query_position_map:
685697
(original_q_st, original_q_ei) = query_position_map[parent_name]
686698
(current_q_st, current_q_ei) = (contig.alignment.q_st, contig.alignment.q_ei)
687-
original_query_len = abs(original_q_st - original_q_ei)
688699
current_query_len = abs(current_q_st - current_q_ei)
689700

690701
if contig_name in lstrip_map:
Loading
Loading

0 commit comments

Comments
 (0)