Skip to content

Commit 8e90f34

Browse files
committed
Contig stitcher: add missing type signatures
1 parent 834c89b commit 8e90f34

File tree

2 files changed

+57
-58
lines changed

2 files changed

+57
-58
lines changed

micall/core/plot_contigs.py

+56-57
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import typing
2+
from typing import Dict, Tuple, List, Set, Iterable, NoReturn
23
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
34
from collections import Counter, defaultdict
45
from csv import DictReader
@@ -19,6 +20,7 @@
1920

2021
from micall.core.project_config import ProjectConfig
2122
from micall.utils.alignment_wrapper import align_nucs
23+
from micall.core.contig_stitcher import Contig, GenotypedContig, AlignedContig
2224
import micall.utils.contig_stitcher_events as events
2325

2426

@@ -395,19 +397,14 @@ def build_coverage_figure(genome_coverage_csv, blast_csv=None, use_concordance=F
395397
return f
396398

397399

398-
def plot_stitcher_coverage(logs, genome_coverage_svg_path):
400+
def plot_stitcher_coverage(logs: Iterable[events.EventType], genome_coverage_svg_path: str):
399401
f = build_stitcher_figure(logs)
400402
f.show(w=970).save_svg(genome_coverage_svg_path, context=draw.Context(invert_y=True))
401403
return f
402404

403405

404-
from types import SimpleNamespace
405-
from typing import Union, Dict, Tuple, List, Optional, Set
406-
from micall.core.contig_stitcher import Contig, GenotypedContig, AlignedContig
407-
import random
408-
409-
def build_stitcher_figure(logs) -> None:
410-
contig_map: Dict[str, Contig] = {}
406+
def build_stitcher_figure(logs: Iterable[events.EventType]) -> Figure:
407+
contig_map: Dict[str, GenotypedContig] = {}
411408
name_mappings: Dict[str, str] = {}
412409
parent_graph: Dict[str, List[str]] = {}
413410
morphism_graph: Dict[str, List[str]] = {}
@@ -515,7 +512,7 @@ def graph_sum(graph_a, graph_b):
515512
def symmetric_closure(graph):
516513
return graph_sum(graph, inverse_graph(graph))
517514

518-
def record_contig(contig: Contig, parents: List[Contig]):
515+
def record_contig(contig: GenotypedContig, parents: List[GenotypedContig]):
519516
contig_map[contig.name] = contig
520517
if [contig.name] != [parent.name for parent in parents]:
521518
for parent in parents:
@@ -532,7 +529,7 @@ def record_morphism(contig: Contig, original: Contig):
532529
if contig.name not in lst:
533530
lst.append(contig.name)
534531

535-
def record_bad_contig(contig: Contig, lst: List[Contig]):
532+
def record_bad_contig(contig: GenotypedContig, lst: List[str]):
536533
contig_map[contig.name] = contig
537534
lst.append(contig.name)
538535

@@ -585,11 +582,13 @@ def record_bad_contig(contig: Contig, lst: List[Contig]):
585582
record_contig(event.right, [event.original])
586583
elif isinstance(event, events.Combine):
587584
record_contig(event.result, event.contigs)
588-
combine_left_edge[event.result.name] = event.contigs[0].name
589-
combine_right_edge[event.result.name] = event.contigs[-1].name
585+
if event.contigs:
586+
combine_left_edge[event.result.name] = event.contigs[0].name
587+
combine_right_edge[event.result.name] = event.contigs[-1].name
590588
elif isinstance(event, (events.IgnoreGap, events.NoOverlap)):
591589
pass
592590
else:
591+
x: NoReturn = event
593592
raise RuntimeError(f"Unrecognized action or event: {event}")
594593

595594
group_refs = {contig.group_ref: len(contig.ref_seq) for contig in contig_map.values() if contig.ref_seq}
@@ -610,23 +609,23 @@ def record_bad_contig(contig: Contig, lst: List[Contig]):
610609
eqv_morphism_graph = reflexive_closure(symmetric_closure(transitive_closure(morphism_graph)))
611610
reduced_morphism_graph = reduced_closure(morphism_graph)
612611

613-
for contig in overlaps_list:
614-
temporary.add(contig)
615-
for child in transitive_children_graph.get(contig, []):
612+
for contig_name in overlaps_list:
613+
temporary.add(contig_name)
614+
for child in transitive_children_graph.get(contig_name, []):
616615
temporary.add(child)
617616

618-
for contig, parents in parent_graph.items():
617+
for contig_name, parents in parent_graph.items():
619618
if len(parents) > 2:
620-
children_join_points.append(contig)
621-
for contig, children in children_graph.items():
619+
children_join_points.append(contig_name)
620+
for contig_name, children in children_graph.items():
622621
if len(children) > 2:
623-
children_meet_points.append(contig)
622+
children_meet_points.append(contig_name)
624623

625-
last_join_points_parent = {contig for join in children_join_points for contig in transitive_parent_graph.get(join, [])}
624+
last_join_points_parent = {contig_name for join in children_join_points for contig_name in transitive_parent_graph.get(join, [])}
626625
last_join_points = []
627-
for contig in children_join_points:
628-
if contig not in last_join_points_parent:
629-
last_join_points.append(contig)
626+
for contig_name in children_join_points:
627+
if contig_name not in last_join_points_parent:
628+
last_join_points.append(contig_name)
630629

631630
def set_query_position(contig: Contig):
632631
if contig.name in query_position_map:
@@ -644,7 +643,7 @@ def set_query_position(contig: Contig):
644643
if parent.name not in query_position_map:
645644
set_query_position(parent)
646645

647-
average = sum(query_position_map[parent_name] for parent_name in parent_names) / len(parent_names)
646+
average = round(sum(query_position_map[parent_name] for parent_name in parent_names) / len(parent_names))
648647
query_position_map[contig.name] = average
649648
else:
650649
query_position_map[contig.name] = (contig.alignment.q_st + contig.alignment.q_ei) // 2
@@ -653,9 +652,9 @@ def set_query_position(contig: Contig):
653652
set_query_position(contig)
654653

655654
# Closing `temporary'
656-
for contig in contig_map:
657-
if contig in temporary:
658-
for clone in eqv_morphism_graph.get(contig, [contig]):
655+
for contig_name in contig_map:
656+
if contig_name in temporary:
657+
for clone in eqv_morphism_graph.get(contig_name, [contig_name]):
659658
temporary.add(clone)
660659

661660
def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
@@ -676,42 +675,42 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
676675
while list(copy_takes_one_side(combine_left_edge, overlap_righttake_map, overlap_rightparent_map)): pass
677676

678677
final_parts: Dict[str, bool] = {}
679-
for contig in contig_map:
680-
if contig in temporary:
678+
for contig_name in contig_map:
679+
if contig_name in temporary:
681680
continue
682681

683-
if contig in overlap_sibling_map:
684-
finals = reduced_morphism_graph.get(contig, [contig])
682+
if contig_name in overlap_sibling_map:
683+
finals = reduced_morphism_graph.get(contig_name, [contig_name])
685684
if len(finals) == 1:
686685
[final] = finals
687686
parents = reduced_parent_graph.get(final, [])
688687
if len(parents) == 1:
689688
final_parts[final] = True
690689

691-
elif contig in bad_contigs:
692-
final_parts[contig] = True
690+
elif contig_name in bad_contigs:
691+
final_parts[contig_name] = True
693692

694693
for join in last_join_points + sorted_sinks:
695694
parents = parent_graph.get(join, [join])
696695
if not any(isinstance(contig_map[parent], AlignedContig) for parent in parents):
697696
parents = [join]
698697

699-
for contig in parents:
700-
for contig in reduced_morphism_graph.get(contig, [contig]):
701-
if contig in bad_contigs:
698+
for contig_name in parents:
699+
for contig_name in reduced_morphism_graph.get(contig_name, [contig_name]):
700+
if contig_name in bad_contigs:
702701
continue
703702

704-
if any(contig in transitive_parent_graph.get(bad, []) for bad in bad_contigs):
703+
if any(contig_name in transitive_parent_graph.get(bad, []) for bad in bad_contigs):
705704
continue
706705

707-
if any(eqv in temporary for eqv in eqv_morphism_graph.get(contig, [contig])):
706+
if any(eqv in temporary for eqv in eqv_morphism_graph.get(contig_name, [contig_name])):
708707
continue
709708

710-
transitive_parent = eqv_parent_graph.get(contig, [contig])
709+
transitive_parent = eqv_parent_graph.get(contig_name, [contig_name])
711710
if any(parent in transitive_parent for parent in final_parts):
712711
continue
713712

714-
final_parts[contig] = True
713+
final_parts[contig_name] = True
715714

716715
final_parent_mapping: Dict[str, List[str]] = {}
717716
for parent_name in sorted_roots:
@@ -725,7 +724,7 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
725724

726725
min_position, max_position = 1, 1
727726
position_offset = 100
728-
for contig in contig_map.values():
727+
for _, contig in contig_map.items():
729728
if isinstance(contig, GenotypedContig) and contig.ref_seq is not None:
730729
max_position = max(max_position, len(contig.ref_seq) + 3 * position_offset)
731730
else:
@@ -748,8 +747,8 @@ def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map):
748747
k += 1
749748
name_mappings[child] = f"{i + 1}.{k + 1}"
750749

751-
for contig, name in name_mappings.items():
752-
logger.debug(f"Contig name {contig!r} is displayed as {name!r}.")
750+
for contig_name, name in name_mappings.items():
751+
logger.debug(f"Contig name {contig_name!r} is displayed as {name!r}.")
753752

754753
def get_neighbours(part, lookup):
755754
for clone in eqv_morphism_graph.get(part.name, [part.name]):
@@ -771,8 +770,8 @@ def get_neighbour(part, lookup):
771770
full_size_map: Dict[str, Tuple[int, int]] = {}
772771

773772
for parent_name in sorted_roots:
774-
parts = final_parent_mapping[parent_name]
775-
parts = [contig_map[part] for part in parts]
773+
parts_names = final_parent_mapping[parent_name]
774+
parts = [contig_map[part] for part in parts_names]
776775

777776
for part in parts:
778777
if not isinstance(part, AlignedContig):
@@ -801,8 +800,8 @@ def get_neighbour(part, lookup):
801800

802801
aligned_size_map[part.name] = (r_st, r_ei)
803802

804-
sibling = ([overlap_sibling_map[name] for name in eqv_morphism_graph.get(part.name, [part.name]) if name in overlap_sibling_map] or [None])[0]
805-
sibling = sibling and contig_map[sibling]
803+
sibling_name = ([overlap_sibling_map[name] for name in eqv_morphism_graph.get(part.name, [part.name]) if name in overlap_sibling_map] or [""])[0]
804+
sibling = sibling_name and contig_map[sibling_name]
806805
prev_part = get_neighbour(sibling, overlap_lefttake_map)
807806
next_part = get_neighbour(sibling, overlap_righttake_map)
808807

@@ -820,7 +819,7 @@ def get_neighbour(part, lookup):
820819

821820
full_size_map[part.name] = (r_st, r_ei)
822821

823-
def get_contig_coordinates(contig):
822+
def get_contig_coordinates(contig: GenotypedContig) -> Tuple[int, int, int, int]:
824823
if isinstance(contig, AlignedContig):
825824
r_st = position_offset + contig.alignment.r_st
826825
r_ei = position_offset + contig.alignment.r_ei
@@ -841,7 +840,7 @@ def get_contig_coordinates(contig):
841840
a_r_ei = f_r_ei
842841
return (a_r_st, a_r_ei, f_r_st, f_r_ei)
843842

844-
def get_tracks(repeatset, group_ref, contig_name):
843+
def get_tracks(repeatset: Set[str], group_ref: str, contig_name: str) -> Iterable[Track]:
845844
parts = final_parent_mapping[contig_name]
846845
for part_name in parts:
847846
part = contig_map[part_name]
@@ -863,7 +862,7 @@ def get_tracks(repeatset, group_ref, contig_name):
863862
(a_r_st, a_r_ei, f_r_st, f_r_ei) = get_contig_coordinates(part)
864863
yield Track(f_r_st, f_r_ei, label=f"{indexes}")
865864

866-
def get_arrows(repeatset, group_ref, contig_name, labels):
865+
def get_arrows(repeatset: Set[str], group_ref: str, contig_name: str, labels: bool) -> Iterable[Arrow]:
867866
parts = final_parent_mapping[contig_name]
868867
for part_name in parts:
869868
part = contig_map[part_name]
@@ -890,8 +889,8 @@ def get_arrows(repeatset, group_ref, contig_name, labels):
890889
h=height,
891890
label=indexes)
892891

893-
def get_all_arrows(group_ref, labels):
894-
repeatset = set()
892+
def get_all_arrows(group_ref: str, labels: bool) -> Iterable[Arrow]:
893+
repeatset: Set[str] = set()
895894
for parent_name in sorted_roots:
896895
yield from get_arrows(repeatset, group_ref, parent_name, labels)
897896

@@ -966,8 +965,8 @@ def get_all_arrows(group_ref, labels):
966965
# Contigs #
967966
###########
968967

969-
repeatset1 = set()
970-
repeatset2 = set()
968+
repeatset1: Set[str] = set()
969+
repeatset2: Set[str] = set()
971970
for parent_name in sorted_roots:
972971
arrows = list(get_arrows(repeatset1, group_ref, parent_name, labels=False))
973972
if arrows:
@@ -992,7 +991,7 @@ def get_all_arrows(group_ref, labels):
992991

993992
contig = contig_map[contig_name]
994993
(r_st, r_ei, f_r_st, f_r_ei) = get_contig_coordinates(contig)
995-
name = name_mappings.get(contig.name, contig.name)
994+
name = name_mappings.get(contig_name, contig_name)
996995
figure.add(Arrow(r_st, r_ei, elevation=-20, h=1))
997996
figure.add(Track(f_r_st, f_r_ei, label=name))
998997

@@ -1021,7 +1020,7 @@ def get_all_arrows(group_ref, labels):
10211020
else:
10221021
colour = "red"
10231022

1024-
name = name_mappings.get(contig.name, contig.name)
1023+
name = name_mappings.get(contig_name, contig_name)
10251024
figure.add(Track(a_r_st, a_r_ei, color=colour, label=name))
10261025

10271026
###########
@@ -1042,7 +1041,7 @@ def get_all_arrows(group_ref, labels):
10421041
r_st = position_offset
10431042
r_ei = position_offset + len(contig.seq)
10441043
colour = "red"
1045-
name = name_mappings.get(contig.name, contig.name)
1044+
name = name_mappings.get(contig_name, contig_name)
10461045
figure.add(Track(r_st, r_ei, color=colour, label=name))
10471046

10481047
if not figure.elements:

micall/utils/contig_stitcher_events.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,4 @@ class FinalCombine:
140140

141141
AlignmentEvent = Union[NoRef, ZeroHits, StrandConflict, HitNumber, ReverseComplement, Hit]
142142
ModifyEvent = Union[LStrip, RStrip]
143-
EventType = Union[Cut, ModifyEvent, Munge, AlignmentEvent, StitchCut, Overlap, NoOverlap, Stitch, Drop, IgnoreGap, SplitGap, Intro]
143+
EventType = Union[Cut, ModifyEvent, Munge, Combine, AlignmentEvent, StitchCut, Overlap, NoOverlap, Stitch, Drop, IgnoreGap, SplitGap, Intro, FinalCombine]

0 commit comments

Comments
 (0)