Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Feb 7, 2025
1 parent acb0160 commit 4423d72
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 27 deletions.
4 changes: 1 addition & 3 deletions src/main/scala/com/fulcrumgenomics/bam/ClipBam.scala
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,7 @@ class ClipBam

val (numExtendingPastMateStartReadOne, numExtendingPastMateStartReadTwo) = {
if (clipBasesPastMate && r1.isFrPair) {
val clip1 = this.clipper.clipExtendingPastMateEnd(rec=r1, mateEnd=r2.end)
val clip2 = this.clipper.clipExtendingPastMateEnd(rec=r2, mateEnd=r1.end)
(clip1, clip2)
this.clipper.clipExtendingPastMateEnds(rec=r1, mate=r2)
}
else (0, 0)
}
Expand Down
42 changes: 21 additions & 21 deletions src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -333,33 +333,31 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean)
* @param rec the record to examine
*/
def numBasesExtendingPastMate(rec: SamRecord): Int = {
val mateEnd = rec.mateEnd.getOrElse(rec.start + abs(rec.insertSize) - 1)
numBasesExtendingPastMate(rec=rec, mateEnd=mateEnd)
val mateUnclippedStart = rec.mateUnclippedStart.getOrElse(rec.mateStart)
val mateUnclippedEnd = rec.mateUnclippedEnd.getOrElse(rec.start + abs(rec.insertSize) - 1)
numBasesExtendingPastMate(rec=rec, mateUnclippedStart=mateUnclippedStart, mateUnclippedEnd=mateUnclippedEnd)
}

/** Returns the number of bases extending past the mate end for FR pairs including any soft-clipped bases, zero otherwise.
*
* @param rec the record to examine
* @param mateEnd the largest mapped genomic coordinate of the mate
* @param mateUnclippedStart the smallest mapped genomic coordinate considering clipping
* @param mateUnclippedEnd the largest mapped genomic coordinate considering clipping
*/
def numBasesExtendingPastMate(rec: SamRecord, mateEnd: Int): Int = {
def numBasesExtendingPastMate(rec: SamRecord, mateUnclippedStart: Int, mateUnclippedEnd: Int): Int = {
if (!rec.isFrPair) 0 else rec.positiveStrand match {
case true if rec.end >= mateEnd =>
// account for any soft-clipping on the same side of the template
val trailingClippedBases = if (rec.end == mateEnd) Math.min(rec.cigar.trailingClippedBases, rec.mateCigar.get.trailingClippedBases) else 0
case true if rec.end >= mateUnclippedEnd =>
// positive strand record is aligned to/past the mate alignment end: count any bases aligned/softclipped after
Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateEnd, returnLastBaseIfDeleted=false) - trailingClippedBases)
Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateUnclippedEnd, returnLastBaseIfDeleted=false))
case true =>
// positive strand record alignment ends before the mate alignment end: remove any excess soft-clipped reads
Math.max(0, rec.cigar.trailingSoftClippedBases - (mateEnd - rec.end))
case false if rec.start > rec.mateStart =>
Math.max(0, rec.cigar.trailingSoftClippedBases - (mateUnclippedEnd - rec.end))
case false if rec.start > mateUnclippedStart =>
// negative strand record alignment starts after the mate alignment start: remove any excess soft-clipped reads
Math.max(0, rec.cigar.leadingSoftClippedBases - (rec.start - rec.mateStart))
Math.max(0, rec.cigar.leadingSoftClippedBases - (rec.start - mateUnclippedStart))
case false =>
// account for any soft-clipping on the same side of the template
val leadingClippedBases = if (rec.start == rec.mateStart) Math.min(rec.cigar.leadingClippedBases, rec.mateCigar.get.leadingClippedBases) else 0
// negative strand record alignment starts at or before the mate start: count up to and including one base before
Math.max(0, rec.readPosAtRefPos(pos=rec.mateStart, returnLastBaseIfDeleted=false) - 1 - leadingClippedBases)
Math.max(0, rec.readPosAtRefPos(pos=mateUnclippedStart, returnLastBaseIfDeleted=false) - 1)
}
}

Expand All @@ -370,8 +368,8 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean)
* @return the additional number of bases clipped (3' end in sequencing order) for the read and mate respectively
*/
def clipExtendingPastMateEnds(rec: SamRecord, mate: SamRecord): (Int, Int) = {
val basesClipped1 = clipExtendingPastMateEnd(rec=rec, mateEnd=mate.end)
val basesClipped2 = clipExtendingPastMateEnd(rec=mate, mateEnd=rec.end)
val basesClipped1 = clipExtendingPastMateEnd(rec=rec, mateUnclippedStart=mate.unclippedStart, mateUnclippedEnd=mate.unclippedEnd)
val basesClipped2 = clipExtendingPastMateEnd(rec=mate, mateUnclippedStart=rec.unclippedStart, mateUnclippedEnd=rec.unclippedEnd)
(basesClipped1, basesClipped2)
}

Expand All @@ -383,20 +381,22 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean)
* @return the additional number of bases clipped (3' end in sequencing order)
*/
def clipExtendingPastMateEnd(rec: SamRecord): Int = {
val mateEnd = rec.mateEnd.getOrElse(rec.start + abs(rec.insertSize) - 1)
clipExtendingPastMateEnd(rec=rec, mateEnd=mateEnd)
val mateUnclippedStart = rec.mateUnclippedStart.getOrElse(rec.mateStart)
val mateUnclippedEnd = rec.mateUnclippedEnd.getOrElse(rec.start + abs(rec.insertSize) - 1)
clipExtendingPastMateEnd(rec=rec, mateUnclippedStart=mateUnclippedStart, mateUnclippedEnd=mateUnclippedEnd)

Check warning on line 386 in src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala#L384-L386

Added lines #L384 - L386 were not covered by tests
}

/** Clips the read in FR read pairs whose alignments extend beyond the far end of their mate's alignment.
*
* @param rec the record to clip
* @param mateEnd the end coordinate of the mate
* @param mateUnclippedStart the smallest mapped genomic coordinate considering clipping
* @param mateUnclippedEnd the largest mapped genomic coordinate considering clipping
* @return the additional number of bases clipped (3' end in sequencing order)
*/
def clipExtendingPastMateEnd(rec: SamRecord, mateEnd: Int): Int = {
def clipExtendingPastMateEnd(rec: SamRecord, mateUnclippedStart: Int, mateUnclippedEnd: Int): Int = {
if (!rec.isFrPair) 0 // do not overlap, don't clip
else {
val totalClippedBases = numBasesExtendingPastMate(rec=rec, mateEnd=mateEnd)
val totalClippedBases = numBasesExtendingPastMate(rec=rec, mateUnclippedStart=mateUnclippedStart, mateUnclippedEnd=mateUnclippedEnd)
if (totalClippedBases == 0) 0 else {
if (rec.positiveStrand) this.clipEndOfRead(rec, totalClippedBases)
else this.clipStartOfRead(rec, totalClippedBases)
Expand Down
45 changes: 45 additions & 0 deletions src/test/scala/com/fulcrumgenomics/bam/SamRecordClipperTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -879,4 +879,49 @@ class SamRecordClipperTest extends UnitSpec with OptionValues {
mate.start shouldBe 1
mate.cigar.toString shouldBe "40M20I40M"
}

"SamRecordClipper.numBasesExtendingPastMate" should "return zero when reads do not extend past the end or are not FR pairs" in {
val builder = new SamBuilder()
val Seq(r1, r2) = builder.addPair(start1=100, start2=200, cigar1="20S80M", cigar2="10S90M")
val Seq(r3, r4) = builder.addPair(start1=100, start2=200, cigar1="10S90M", cigar2="20S80M")
val Seq(r5, r6) = builder.addPair(start1=100, start2=100, cigar1="10S90M", cigar2="20S80M", strand1=Plus, strand2=Plus)
val Seq(r7, r8) = builder.addPair(start1=100, start2=100, cigar1="10S90M", cigar2="20S80M", strand1=Minus, strand2=Minus)

Seq(r1, r2, r3, r4, r5, r6, r7, r8).foreach { r =>
clipper(Soft).numBasesExtendingPastMate(r) shouldBe 0
}
}

it should "return a return a positive value when reads extend past its mate" in {
val builder = new SamBuilder()

// both extend 50bp beyond the mates
val Seq(r1, r2) = builder.addPair(start1=100, start2=50, cigar1="100M", cigar2="100M")
clipper(Soft).numBasesExtendingPastMate(r1) shouldBe 50
clipper(Soft).numBasesExtendingPastMate(r2) shouldBe 50

// r3 ends at 149, which is the same as the end of r4 at 149
// r4 starts at 51, which is the same as the start of r3 at 51
val Seq(r3, r4) = builder.addPair(start1=100, start2=100, cigar1="50S50M", cigar2="50S50M")
clipper(Soft).numBasesExtendingPastMate(r3) shouldBe 0
clipper(Soft).numBasesExtendingPastMate(r4) shouldBe 0

// r5 ends at 199, which is the same as the end of r6 at 199
// r6 starts at 51, which is the same as the start of 45 at 51
val Seq(r5, r6) = builder.addPair(start1=100, start2=100, cigar1="50M50S", cigar2="50M50S")
clipper(Soft).numBasesExtendingPastMate(r5) shouldBe 0
clipper(Soft).numBasesExtendingPastMate(r6) shouldBe 0

// r7 ends at 149, which is before the end of r8 at 169
// r8 starts at 71, which is after the start of r7 at 51
val Seq(r7, r8) = builder.addPair(start1=100, start2=100, cigar1="50S50M", cigar2="30S70M")
clipper(Soft).numBasesExtendingPastMate(r7) shouldBe 0
clipper(Soft).numBasesExtendingPastMate(r8) shouldBe 0

// r9 ends at 169, which is after the end of r10 at 149
// r10 starts at 51, which is before the start of r9 at 71
val Seq(r9, r10) = builder.addPair(start1=100, start2=100, cigar1="30S70M", cigar2="50S50M")
clipper(Soft).numBasesExtendingPastMate(r9) shouldBe 20
clipper(Soft).numBasesExtendingPastMate(r10) shouldBe 20
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -541,9 +541,9 @@ class VanillaUmiConsensusCallerTest extends UnitSpec with OptionValues {
val s1 = cc().toSourceRead(r1, minBaseQuality=2.toByte, trim=false).value
val s2 = cc().toSourceRead(r2, minBaseQuality=2.toByte, trim=false).value

s1.baseString shouldBe "A"*2 + "C"*38 // trimmed by ten bases at the 3' end, five due to existing soft-clipping and the other due to past mate start
s1.cigar.toString shouldBe "10S30M"
s2.baseString shouldBe "C"*2 + "G"*46 // trimmed by 2 bases at the 3' end (the "12S - 10S" in the cigars)
s1.baseString shouldBe "A"*2 + "C"*46 // end of r1 is 60, whereas the end of r2 is 58, so trim 2bp off the end of r1
s1.cigar.toString shouldBe "10S35M3S"
s2.baseString shouldBe "C"*2 + "G"*46 // start of r1 is 10, whereas the start of r2 is 8, so trim 2bp off the start of r2
s2.cigar.toString shouldBe "8S30M10S" // NB: cigar is reversed in toSourceRead
}

Expand Down

0 comments on commit 4423d72

Please sign in to comment.