@@ -905,25 +905,47 @@ def collect_gaps(root: str, children_names: List[str]):
905
905
yield carved
906
906
907
907
carved_unaligned_parts : Dict [str , List [str ]] = {}
908
- counter = 0
908
+ fake_name_counter = 0
909
909
for root in sorted_roots :
910
910
existing : Set [Tuple [int , int ]] = set ()
911
911
children = final_children_mapping [root ]
912
912
for gap in collect_gaps (root , children ):
913
913
coords = (gap .q_st , gap .q_ei )
914
914
if coords not in existing :
915
915
existing .add (coords )
916
- counter += 1
917
- fake_name = f"u{ counter } "
916
+ fake_name_counter += 1
917
+ fake_name = f"u{ fake_name_counter } "
918
918
if root not in carved_unaligned_parts :
919
919
carved_unaligned_parts [root ] = []
920
920
carved_unaligned_parts [root ].append (fake_name )
921
921
query_position_map [fake_name ] = coords
922
922
923
- name_map = {}
923
+ merged_unaligned_parts : Dict [ str , List [ str ]] = {}
924
924
for i , root in enumerate (sorted_roots ):
925
925
children = final_children_mapping [root ]
926
926
unaligned_children = carved_unaligned_parts .get (root , [])
927
+ todo = children + unaligned_children
928
+ todo = list (sorted (todo , key = lambda name : query_position_map .get (name , (- 1 , - 1 ))))
929
+ current_group = []
930
+ for child_name in todo + [None ]:
931
+ if child_name in unaligned_children :
932
+ coords = query_position_map [child_name ]
933
+ current_group .append (coords )
934
+ elif current_group :
935
+ coords = (min (q_st for q_st , q_ei in current_group ),
936
+ max (q_ei for q_st , q_ei in current_group ))
937
+ fake_name_counter += 1
938
+ fake_name = f"u{ fake_name_counter } "
939
+ query_position_map [fake_name ] = coords
940
+ if root not in merged_unaligned_parts :
941
+ merged_unaligned_parts [root ] = []
942
+ merged_unaligned_parts [root ].append (fake_name )
943
+ current_group = []
944
+
945
+ name_map = {}
946
+ for i , root in enumerate (sorted_roots ):
947
+ children = final_children_mapping [root ]
948
+ unaligned_children = merged_unaligned_parts .get (root , [])
927
949
928
950
name_map [root ] = f"{ i + 1 } "
929
951
@@ -1144,15 +1166,15 @@ def add_section(title: str) -> None:
1144
1166
# Discarded #
1145
1167
#############
1146
1168
1147
- if discarded or carved_unaligned_parts :
1169
+ if discarded or merged_unaligned_parts :
1148
1170
add_section ("discards:" )
1149
1171
for root in sorted_roots :
1150
1172
if contig_map [root ].group_ref != group_ref :
1151
1173
continue
1152
1174
1153
1175
parts_names = final_children_mapping [root ]
1154
1176
parts_names = [name for name in parts_names if name in discarded ]
1155
- unaligned_parts = carved_unaligned_parts .get (root , [])
1177
+ unaligned_parts = merged_unaligned_parts .get (root , [])
1156
1178
for name in sorted (parts_names + unaligned_parts , key = lambda x : name_map [x .name ] if isinstance (x , Contig ) else name_map [x ]):
1157
1179
if name in unaligned_parts :
1158
1180
(q_st , q_ei ) = query_position_map [name ]
0 commit comments