@@ -79,6 +79,11 @@ def trim(segment, primer_pos, end, verbose=False):
7979 vernose : bool
8080 If True, will print soft masking info during trimming
8181 """
82+ if verbose :
83+ print (
84+ f"{ segment .query_name } : Trimming { 'end' if end else 'start' } of read to primer position { primer_pos } " ,
85+ file = sys .stderr ,
86+ )
8287 # get a copy of the cigar tuples to work with
8388 cigar = copy (segment .cigartuples )
8489
@@ -99,11 +104,14 @@ def trim(segment, primer_pos, end, verbose=False):
99104 else :
100105 flag , length = cigar .pop (0 )
101106 if verbose :
102- print ("Chomped a %s, %s" % (flag , length ), file = sys .stderr )
107+ print (
108+ f"{ segment .query_name } : Chomped a { flag } , { length } " ,
109+ file = sys .stderr ,
110+ )
103111 except IndexError :
104112 if verbose :
105113 print (
106- " Ran out of cigar during soft masking - completely masked read will be ignored" ,
114+ f" { segment . query_name } : Ran out of cigar during soft masking - completely masked read will be ignored" ,
107115 file = sys .stderr ,
108116 )
109117 break
@@ -128,10 +136,13 @@ def trim(segment, primer_pos, end, verbose=False):
128136 # calculate how many extra matches are needed in the CIGAR
129137 extra = abs (pos - primer_pos )
130138 if verbose :
131- print (" extra %s" % ( extra ) , file = sys .stderr )
139+ print (f" { segment . query_name } : extra { extra } " , file = sys .stderr )
132140 if extra :
133141 if verbose :
134- print ("Inserted a %s, %s" % (0 , extra ), file = sys .stderr )
142+ print (
143+ f"{ segment .query_name } : Inserted a 0, { extra } " ,
144+ file = sys .stderr ,
145+ )
135146 if end :
136147 cigar .append ((0 , extra ))
137148 else :
@@ -144,13 +155,16 @@ def trim(segment, primer_pos, end, verbose=False):
144155 # update the position of the leftmost mappinng base
145156 segment .pos = pos - extra
146157 if verbose :
147- print ("New pos: %s" % (segment .pos ), file = sys .stderr )
158+ print (
159+ f"{ segment .query_name } : New pos - { segment .pos } " ,
160+ file = sys .stderr ,
161+ )
148162
149163 # if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
150164 if cigar [0 ][0 ] == 2 :
151165 if verbose :
152166 print (
153- " softmask created a leading deletion in the CIGAR, shuffling the alignment" ,
167+ f" { segment . query_name } : softmask created a leading deletion in the CIGAR, shuffling the alignment" ,
154168 file = sys .stderr ,
155169 )
156170 while 1 :
@@ -168,7 +182,13 @@ def trim(segment, primer_pos, end, verbose=False):
168182
169183 # check the new CIGAR and replace the old one
170184 if cigar [0 ][1 ] <= 0 or cigar [- 1 ][1 ] <= 0 :
171- raise ("invalid cigar operation created - possibly due to INDEL in primer" )
185+ if verbose :
186+ print (
187+ f"{ segment .query_name } : invalid cigar operation created - possibly due to INDEL in primer" ,
188+ file = sys .stderr ,
189+ )
190+ return
191+
172192 segment .cigartuples = cigar
173193 return
174194
@@ -195,16 +215,22 @@ def handle_segment(
195215 # filter out unmapped and supplementary alignment segments
196216 if segment .is_unmapped :
197217 if args .verbose :
198- print ("%s skipped as unmapped" % (segment .query_name ), file = sys .stderr )
218+ print (
219+ f"{ segment .query_name } : { segment .query_name } skipped as unmapped" ,
220+ file = sys .stderr ,
221+ )
199222 return False
200223 if segment .is_supplementary :
201224 if args .verbose :
202- print ("%s skipped as supplementary" % (segment .query_name ), file = sys .stderr )
225+ print (
226+ f"{ segment .query_name } : { segment .query_name } skipped as supplementary" ,
227+ file = sys .stderr ,
228+ )
203229 return False
204230 if segment .mapping_quality < min_mapq :
205231 if args .verbose :
206232 print (
207- "%s skipped as mapping quality below threshold" % ( segment . query_name ) ,
233+ f" { segment . query_name } : skipped as mapping quality below threshold" ,
208234 file = sys .stderr ,
209235 )
210236 return False
@@ -230,7 +256,7 @@ def handle_segment(
230256 if not p1 or not p2 :
231257 if args .verbose :
232258 print (
233- "%s skipped as no primer found for segment" % ( segment . query_name ) ,
259+ f" { segment . query_name } : skipped as no primer found for segment" ,
234260 file = sys .stderr ,
235261 )
236262 return False
@@ -260,9 +286,9 @@ def handle_segment(
260286 "ReferenceEnd" : segment .reference_end ,
261287 "PrimerPair" : f"{ p1 [2 ]['Primer_ID' ]} _{ p2 [2 ]['Primer_ID' ]} " ,
262288 "Primer1" : p1 [2 ]["Primer_ID" ],
263- "Primer1Start" : abs ( p1 [1 ]) ,
289+ "Primer1Start" : p1 [2 ][ "start" ] ,
264290 "Primer2" : p2 [2 ]["Primer_ID" ],
265- "Primer2Start" : abs ( p2 [1 ]) ,
291+ "Primer2Start" : p2 [2 ][ "start" ] ,
266292 "IsSecondary" : segment .is_secondary ,
267293 "IsSupplementary" : segment .is_supplementary ,
268294 "Start" : p1 [2 ]["start" ],
@@ -275,16 +301,11 @@ def handle_segment(
275301 if args .remove_incorrect_pairs and not correctly_paired :
276302 if args .verbose :
277303 print (
278- "%s skipped as not correctly paired" % ( segment . query_name ) ,
304+ f" { segment . query_name } : skipped as not correctly paired" ,
279305 file = sys .stderr ,
280306 )
281307 return False
282308
283- if args .verbose :
284- # Dont screw with the order of the dict
285- report_str = "\t " .join (str (x ) for x in report .values ())
286- print (report_str , file = sys .stderr )
287-
288309 # get the primer positions
289310 if args .trim_primers :
290311 p1_position = p1 [2 ]["end" ]
@@ -299,15 +320,12 @@ def handle_segment(
299320 trim (segment , p1_position , False , args .verbose )
300321 if args .verbose :
301322 print (
302- "ref start %s >= primer_position %s"
303- % (segment .reference_start , p1_position ),
323+ f"{ segment .query_name } : ref start { segment .reference_start } >= primer_position { p1_position } " ,
304324 file = sys .stderr ,
305325 )
306326 except Exception as e :
307327 print (
308- "problem soft masking left primer in {} (error: {}), skipping" .format (
309- segment .query_name , e
310- ),
328+ f"{ segment .query_name } : problem soft masking left primer (error: { e } ), skipping" ,
311329 file = sys .stderr ,
312330 )
313331 return False
@@ -318,15 +336,12 @@ def handle_segment(
318336 trim (segment , p2_position , True , args .verbose )
319337 if args .verbose :
320338 print (
321- "ref start %s >= primer_position %s"
322- % (segment .reference_start , p2_position ),
339+ f"{ segment .query_name } : ref start { segment .reference_start } >= primer_position { p2_position } " ,
323340 file = sys .stderr ,
324341 )
325342 except Exception as e :
326343 print (
327- "problem soft masking right primer in {} (error: {}), skipping" .format (
328- segment .query_name , e
329- ),
344+ f"{ segment .query_name } : problem soft masking right primer (error: { e } ), skipping" ,
330345 file = sys .stderr ,
331346 )
332347 return False
@@ -335,8 +350,7 @@ def handle_segment(
335350 if "M" not in segment .cigarstring :
336351 if args .verbose :
337352 print (
338- "%s dropped as does not match reference post masking"
339- % (segment .query_name ),
353+ f"{ segment .query_name } : dropped as does not match reference post masking" ,
340354 file = sys .stderr ,
341355 )
342356 return False
@@ -366,9 +380,10 @@ def handle_paired_segment(
366380 segment1 , segment2 = segments
367381
368382 if not segment1 or not segment2 :
383+ segment = segment1 if segment1 else segment2
369384 if args .verbose :
370385 print (
371- "Segment pair skipped as at least one segment in pair does not exist" ,
386+ f" { segment . query_name } : Pair skipped as at least one segment in pair does not exist" ,
372387 file = sys .stderr ,
373388 )
374389 return False
@@ -377,24 +392,23 @@ def handle_paired_segment(
377392 if segment1 .is_unmapped or segment2 .is_unmapped :
378393 if args .verbose :
379394 print (
380- " Segment pair: %s skipped as unmapped" % ( segment1 . query_name ) ,
395+ f" { segment1 . query_name } : Segment pair skipped as one of pair is unmapped" ,
381396 file = sys .stderr ,
382397 )
383398 return False
384399
385400 if segment1 .is_supplementary or segment2 .is_supplementary :
386401 if args .verbose :
387402 print (
388- "Segment pair: %s skipped as supplementary" % ( segment1 . query_name ) ,
403+ f" { segment1 . query_name } : Pair skipped as at least one of pair is supplementary" ,
389404 file = sys .stderr ,
390405 )
391406 return False
392407
393408 if segment1 .mapping_quality < min_mapq or segment2 .mapping_quality < min_mapq :
394409 if args .verbose :
395410 print (
396- "Segment pair: %s skipped as mapping quality below threshold"
397- % (segment1 .query_name ),
411+ f"{ segment1 .query_name } : Pair skipped as at least one mapping quality of pair is below threshold" ,
398412 file = sys .stderr ,
399413 )
400414 return False
@@ -418,8 +432,7 @@ def handle_paired_segment(
418432 if not p1 or not p2 :
419433 if args .verbose :
420434 print (
421- "Paired segment: %s skipped as no primer found for segment"
422- % (segment1 .query_name ),
435+ f"{ segment1 .query_name } : Pair skipped as no primer found for at least one read in pair" ,
423436 file = sys .stderr ,
424437 )
425438 return False
@@ -451,9 +464,9 @@ def handle_paired_segment(
451464 "ReferenceEnd" : segment2 .reference_end ,
452465 "PrimerPair" : f"{ p1 [2 ]['Primer_ID' ]} _{ p2 [2 ]['Primer_ID' ]} " ,
453466 "Primer1" : p1 [2 ]["Primer_ID" ],
454- "Primer1Start" : abs ( p1 [1 ]) ,
467+ "Primer1Start" : p1 [2 ][ "start" ] ,
455468 "Primer2" : p2 [2 ]["Primer_ID" ],
456- "Primer2Start" : abs ( p2 [1 ]) ,
469+ "Primer2Start" : p2 [2 ][ "start" ] ,
457470 "IsSecondary" : segment1 .is_secondary ,
458471 "IsSupplementary" : segment1 .is_supplementary ,
459472 "Start" : p1 [2 ]["start" ],
@@ -466,17 +479,11 @@ def handle_paired_segment(
466479 if args .remove_incorrect_pairs and not correctly_paired :
467480 if args .verbose :
468481 print (
469- "Paired segment: %s skipped as not correctly paired"
470- % (segment1 .query_name ),
482+ f"{ segment1 .query_name } : Pair skipped due to primers not being correctly paired between reads of pair" ,
471483 file = sys .stderr ,
472484 )
473485 return False
474486
475- if args .verbose :
476- # Dont screw with the order of the dict
477- report_str = "\t " .join (str (x ) for x in report .values ())
478- print (report_str , file = sys .stderr )
479-
480487 # get the primer positions
481488 if args .trim_primers :
482489 p1_position = p1 [2 ]["end" ]
@@ -491,15 +498,27 @@ def handle_paired_segment(
491498 trim (segment1 , p1_position , False , args .verbose )
492499 if args .verbose :
493500 print (
494- "ref start %s >= primer_position %s"
495- % (segment1 .reference_start , p1_position ),
501+ f"{ segment1 .query_name } : ref start { segment1 .reference_start } >= primer_position { p1_position } " ,
496502 file = sys .stderr ,
497503 )
498504 except Exception as e :
499505 print (
500- "problem soft masking left primer in {} (error: {}), skipping" .format (
501- segment1 .query_name , e
502- ),
506+ f"{ segment1 .query_name } : Problem soft masking left primer (error: { e } ), skipping" ,
507+ file = sys .stderr ,
508+ )
509+ return False
510+
511+ elif segment1 .reference_end > p2_position :
512+ try :
513+ trim (segment1 , p2_position , True , args .verbose )
514+ if args .verbose :
515+ print (
516+ f"{ segment1 .query_name } : ref_end { segment1 .reference_end } >= primer_position { p2_position } " ,
517+ file = sys .stderr ,
518+ )
519+ except Exception as e :
520+ print (
521+ f"{ segment1 .query_name } : Problem soft masking right primer (error: { e } ), skipping" ,
503522 file = sys .stderr ,
504523 )
505524 return False
@@ -510,15 +529,26 @@ def handle_paired_segment(
510529 trim (segment2 , p2_position , True , args .verbose )
511530 if args .verbose :
512531 print (
513- "ref start %s >= primer_position %s"
514- % (segment2 .reference_start , p2_position ),
532+ f"{ segment1 .query_name } : ref_start { segment2 .reference_start } >= primer_position { p2_position } " ,
515533 file = sys .stderr ,
516534 )
517535 except Exception as e :
518536 print (
519- "problem soft masking right primer in {} (error: {}), skipping" .format (
520- segment1 .query_name , e
521- ),
537+ f"{ segment1 .query_name } : Problem soft masking right primer (error: { e } ), skipping" ,
538+ file = sys .stderr ,
539+ )
540+ return False
541+ elif segment2 .reference_start < p1_position :
542+ try :
543+ trim (segment2 , p1_position , False , args .verbose )
544+ if args .verbose :
545+ print (
546+ f"{ segment1 .query_name } : ref_end { segment2 .reference_end } >= primer_position { p1_position } " ,
547+ file = sys .stderr ,
548+ )
549+ except Exception as e :
550+ print (
551+ f"{ segment1 .query_name } : Problem soft masking left primer (error: { e } ), skipping" ,
522552 file = sys .stderr ,
523553 )
524554 return False
@@ -527,8 +557,7 @@ def handle_paired_segment(
527557 if "M" not in segment1 .cigarstring or "M" not in segment2 .cigarstring :
528558 if args .verbose :
529559 print (
530- "Paired segment: %s dropped as does not match reference post masking"
531- % (segment1 .query_name ),
560+ f"{ segment1 .query_name } : Paired segment dropped as does not match reference post masking" ,
532561 file = sys .stderr ,
533562 )
534563 return False
@@ -613,7 +642,7 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
613642 for chrom , amplicon_dict in trimmed_segments .items ():
614643 for amplicon , segments in amplicon_dict .items ():
615644 if amplicon not in amplicons [chrom ]:
616- raise ValueError (f"Segment { amplicon } not found in primer scheme file" )
645+ raise ValueError (f"Amplicon { amplicon } not found in primer scheme file" )
617646
618647 desired_depth = np .full_like (
619648 (amplicons [chrom ][amplicon ]["length" ],), normalise , dtype = int
@@ -952,8 +981,8 @@ def main():
952981 parser .add_argument (
953982 "--no-read-groups" ,
954983 dest = "no_read_groups" ,
955- action = "store_true" ,
956984 help = "Do not divide reads into groups in SAM output" ,
985+ action = "store_true" ,
957986 )
958987 parser .add_argument ("--verbose" , action = "store_true" , help = "Debug mode" )
959988 parser .add_argument ("--remove-incorrect-pairs" , action = "store_true" )
0 commit comments