@@ -196,7 +196,7 @@ def combine_contigs(parts: List[AlignedContig]) -> AlignedContig:
196
196
return ret
197
197
198
198
199
- def align_to_reference (contig ) -> Iterable [GenotypedContig ]:
199
+ def align_to_reference (contig : GenotypedContig ) -> Iterable [GenotypedContig ]:
200
200
"""
201
201
Align a single Contig to its reference sequence, producing potentially multiple aligned contigs.
202
202
@@ -212,12 +212,11 @@ def align_to_reference(contig) -> Iterable[GenotypedContig]:
212
212
213
213
aligner = Aligner (seq = contig .ref_seq , preset = 'map-ont' )
214
214
alignments = list (aligner .map (contig .seq ))
215
- hits_array = [(CigarHit (Cigar (x .cigar ),
216
- min (x .r_st , x .r_en - 1 ),
217
- max (x .r_st , x .r_en - 1 ),
218
- min (x .q_st , x .q_en - 1 ),
219
- max (x .q_st , x .q_en - 1 )),
220
- "forward" if x .strand == 1 else "reverse" ) for x in alignments ]
215
+ hits_array : List [Tuple [CigarHit , Literal ["forward" , "reverse" ]]] = \
216
+ [(CigarHit (Cigar (x .cigar ),
217
+ min (x .r_st , x .r_en - 1 ), max (x .r_st , x .r_en - 1 ),
218
+ min (x .q_st , x .q_en - 1 ), max (x .q_st , x .q_en - 1 )),
219
+ "forward" if x .strand == 1 else "reverse" ) for x in alignments ]
221
220
222
221
connected = connect_cigar_hits (list (map (lambda p : p [0 ], hits_array ))) if hits_array else []
223
222
@@ -263,35 +262,47 @@ def align_to_reference(contig) -> Iterable[GenotypedContig]:
263
262
yield part
264
263
265
264
266
- def strip_conflicting_mappings (contigs ) :
265
+ def strip_conflicting_mappings (contigs : Iterable [ GenotypedContig ]) -> Iterable [ GenotypedContig ] :
267
266
contigs = list (contigs )
268
267
names = {contig .name : contig for contig in contigs }
269
- reference_indexes = list (sorted (names .keys (), key = lambda name : names [name ].alignment .r_st if isinstance (names [name ], AlignedContig ) else - 1 ))
270
- query_indexes = list (sorted (names .keys (), key = lambda name : names [name ].alignment .q_st if isinstance (names [name ], AlignedContig ) else - 1 ))
271
268
272
- def is_out_of_order (name ):
273
- return reference_indexes .index (name ) != query_indexes .index (name )
269
+ def get_indexes (name : str ) -> Tuple [int , int ]:
270
+ contig = names [name ]
271
+ if isinstance (contig , AlignedContig ):
272
+ return (contig .alignment .q_st , contig .alignment .r_st )
273
+ else :
274
+ return (- 1 , - 1 )
274
275
275
- sorted_by_query = list (sorted (contigs , key = lambda contig : contig .alignment .q_st if isinstance (contig , AlignedContig ) else - 1 ))
276
+ reference_sorted = list (sorted (names .keys (), key = lambda name : get_indexes (name )[1 ]))
277
+ query_sorted = list (sorted (names .keys (), key = lambda name : get_indexes (name )[0 ]))
278
+
279
+ def is_out_of_order (name : str ) -> bool :
280
+ return reference_sorted .index (name ) != query_sorted .index (name )
276
281
282
+ sorted_by_query = list (sorted (contigs , key = lambda contig : contig .alignment .q_st if isinstance (contig , AlignedContig ) else - 1 ))
277
283
for prev_contig , contig , next_contig in sliding_window (sorted_by_query ):
278
- name = contig .name
279
- if prev_contig is not None or is_out_of_order (name ):
280
- contig = contig .lstrip_query ()
281
- if next_contig is not None or is_out_of_order (name ):
282
- contig = contig .rstrip_query ()
284
+ if isinstance (contig , AlignedContig ):
285
+ name = contig .name
286
+ if prev_contig is not None or is_out_of_order (name ):
287
+ contig = contig .lstrip_query ()
288
+ if next_contig is not None or is_out_of_order (name ):
289
+ contig = contig .rstrip_query ()
290
+
283
291
yield contig
284
292
285
293
286
- def align_all_to_reference (contigs ) :
294
+ def align_all_to_reference (contigs : Iterable [ GenotypedContig ]) -> Iterable [ GenotypedContig ] :
287
295
"""
288
296
Align multiple contigs to their respective reference sequences.
289
297
290
298
Applies align_to_reference to each contig in the given collection,
291
299
flattening the result into a single list.
292
300
"""
293
301
294
- return [contig for parts in map (strip_conflicting_mappings , map (align_to_reference , contigs )) for contig in parts ]
302
+ groups = map (align_to_reference , contigs )
303
+ groups = map (strip_conflicting_mappings , groups )
304
+ for group in groups :
305
+ yield from group
295
306
296
307
297
308
def align_queries (seq1 : str , seq2 : str ) -> Tuple [str , str ]:
@@ -514,7 +525,7 @@ def merge_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
514
525
return merged_intervals
515
526
516
527
517
- def find_covered_contig (contigs : List [AlignedContig ]) -> Optional [AlignedContig ]:
528
+ def find_covered_contig (contigs : List [AlignedContig ]) -> Tuple [ Optional [AlignedContig ], List [ AlignedContig ] ]:
518
529
"""
519
530
Find and return the first contig that is completely covered by other contigs.
520
531
@@ -539,7 +550,7 @@ def calculate_cumulative_coverage(contigs) -> List[Tuple[int, int]]:
539
550
for cover_interval in cumulative_coverage ):
540
551
return current , overlaping_contigs
541
552
542
- return None , None
553
+ return None , []
543
554
544
555
545
556
def drop_completely_covered (contigs : List [AlignedContig ]) -> List [AlignedContig ]:
@@ -624,7 +635,7 @@ def try_split(contig):
624
635
return contigs
625
636
626
637
627
- def stitch_contigs (contigs : Iterable [GenotypedContig ]) -> Iterable [AlignedContig ]:
638
+ def stitch_contigs (contigs : Iterable [GenotypedContig ]) -> Iterable [GenotypedContig ]:
628
639
contigs = list (contigs )
629
640
for contig in contigs :
630
641
logger .info ("Introduced contig %r of ref %r, group_ref %r, and length %s." ,
@@ -634,15 +645,11 @@ def stitch_contigs(contigs: Iterable[GenotypedContig]) -> Iterable[AlignedContig
634
645
contig .name , contig .seq , contig .ref_name ,
635
646
contig .group_ref , contig .ref_seq , len (contig .seq ))
636
647
637
- aligned = align_all_to_reference (contigs )
648
+ maybe_aligned = list ( align_all_to_reference (contigs ) )
638
649
639
650
# Contigs that did not align do not need any more processing
640
- yield from (x for x in aligned if not isinstance (x , AlignedContig ))
641
- aligned = [x for x in aligned if isinstance (x , AlignedContig )]
642
-
643
- # Contigs aligned in reverse do not need any more processing
644
- yield from (x for x in aligned if x .strand == "reverse" )
645
- aligned = [x for x in aligned if x .strand == "forward" ]
651
+ yield from (x for x in maybe_aligned if not isinstance (x , AlignedContig ))
652
+ aligned = [x for x in maybe_aligned if isinstance (x , AlignedContig )]
646
653
647
654
aligned = split_contigs_with_gaps (aligned )
648
655
aligned = drop_completely_covered (aligned )
0 commit comments