Skip to content

Commit f7e6c57

Browse files
author
yash-puligundla
committed
Addressing the feedback from oct 11, 2023 except implementing the Stripe Flag in RANS Nx16 encoder
1 parent 43145d4 commit f7e6c57

File tree

4 files changed

+162
-169
lines changed

4 files changed

+162
-169
lines changed

scripts/install-samtools.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/sh
22
set -ex
33
wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2
4-
# CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodes/tests/dat
4+
# Note that the CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodecs/tests/dat
55
tar -xjvf samtools-1.14.tar.bz2
66
cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install

src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java

+34-28
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import htsjdk.samtools.cram.compression.rans.Constants;
66
import htsjdk.samtools.cram.compression.rans.RANSDecode;
77
import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol;
8-
import htsjdk.samtools.cram.compression.rans.RANSParams;
98
import htsjdk.samtools.cram.compression.rans.Utils;
109

1110
import java.nio.ByteBuffer;
@@ -17,17 +16,17 @@ public class RANSNx16Decode extends RANSDecode {
1716
private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01;
1817

1918
public ByteBuffer uncompress(final ByteBuffer inBuffer) {
19+
20+
// For RANS decoding, the bytes are read in little endian from the input stream
21+
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
2022
return uncompress(inBuffer, 0);
2123
}
2224

23-
public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
25+
private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
2426
if (inBuffer.remaining() == 0) {
2527
return EMPTY_BUFFER;
2628
}
2729

28-
// For RANS decoding, the bytes are read in little endian from the input stream
29-
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
30-
3130
// the first byte of compressed stream gives the formatFlags
3231
final int formatFlags = inBuffer.get() & 0xFF;
3332
final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags);
@@ -70,7 +69,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
7069
uncompressedRLEOutputLength = outSize;
7170
outSize = Utils.readUint7(inBuffer);
7271
// TODO: maybe move decodeRLEMeta in-line
73-
uncompressedRLEMetaData = decodeRLEMeta(inBuffer, ransNx16Params, uncompressedRLEMetaDataLength, rleSymbols);
72+
uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols);
7473
}
7574

7675
ByteBuffer outBuffer = ByteBuffer.allocate(outSize);
@@ -86,7 +85,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
8685
uncompressOrder0WayN(inBuffer, outBuffer, outSize, ransNx16Params);
8786
break;
8887
case ONE:
89-
uncompressOrder1WayN(inBuffer, outBuffer, outSize, ransNx16Params);
88+
uncompressOrder1WayN(inBuffer, outBuffer, ransNx16Params);
9089
break;
9190
default:
9291
throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder());
@@ -167,7 +166,6 @@ private ByteBuffer uncompressOrder0WayN(
167166
private ByteBuffer uncompressOrder1WayN(
168167
final ByteBuffer inBuffer,
169168
final ByteBuffer outBuffer,
170-
final int outSize,
171169
final RANSNx16Params ransNx16Params) {
172170
initializeRANSDecoder();
173171

@@ -286,7 +284,7 @@ private void readFrequencyTableOrder0(
286284

287285
private void readFrequencyTableOrder1(
288286
final ByteBuffer cp,
289-
int shift) {
287+
final int shift) {
290288
final int[][] frequencies = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS];
291289
final ArithmeticDecoder[] D = getD();
292290
final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols();
@@ -349,7 +347,10 @@ private static int[] readAlphabet(final ByteBuffer cp){
349347
return alphabet;
350348
}
351349

352-
private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ransParams, final int uncompressedRLEMetaDataLength, final int[] rleSymbols) {
350+
private ByteBuffer decodeRLEMeta(
351+
final ByteBuffer inBuffer,
352+
final int uncompressedRLEMetaDataLength,
353+
final int[] rleSymbols) {
353354
ByteBuffer uncompressedRLEMetaData;
354355
final int compressedRLEMetaDataLength;
355356
if ((uncompressedRLEMetaDataLength & 0x01)!=0) {
@@ -370,15 +371,19 @@ private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ra
370371

371372
int numRLESymbols = uncompressedRLEMetaData.get() & 0xFF;
372373
if (numRLESymbols == 0) {
373-
numRLESymbols = 256;
374+
numRLESymbols = Constants.NUMBER_OF_SYMBOLS;
374375
}
375376
for (int i = 0; i< numRLESymbols; i++) {
376377
rleSymbols[uncompressedRLEMetaData.get() & 0xFF] = 1;
377378
}
378379
return uncompressedRLEMetaData;
379380
}
380381

381-
private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, int uncompressedRLEOutputLength) {
382+
private ByteBuffer decodeRLE(
383+
ByteBuffer inBuffer,
384+
final int[] rleSymbols,
385+
final ByteBuffer uncompressedRLEMetaData,
386+
final int uncompressedRLEOutputLength) {
382387
ByteBuffer rleOutBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength);
383388
int j = 0;
384389
for(int i = 0; j< uncompressedRLEOutputLength; i++){
@@ -396,7 +401,11 @@ private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final
396401
return inBuffer;
397402
}
398403

399-
private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) {
404+
private ByteBuffer decodePack(
405+
ByteBuffer inBuffer,
406+
final int[] packMappingTable,
407+
final int numSymbols,
408+
final int uncompressedPackOutputLength) {
400409
ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength);
401410
int j = 0;
402411

@@ -445,38 +454,35 @@ else if (numSymbols <= 16){
445454
return inBuffer;
446455
}
447456

448-
private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){
449-
457+
private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){
450458
final int numInterleaveStreams = inBuffer.get() & 0xFF;
451459

452460
// retrieve lengths of compressed interleaved streams
453-
int[] clen = new int[numInterleaveStreams];
461+
final int[] compressedLengths = new int[numInterleaveStreams];
454462
for ( int j=0; j<numInterleaveStreams; j++ ){
455-
clen[j] = Utils.readUint7(inBuffer);
463+
compressedLengths[j] = Utils.readUint7(inBuffer);
456464
}
457465

458466
// Decode the compressed interleaved stream
459-
int[] ulen = new int[numInterleaveStreams];
460-
ByteBuffer[] T = new ByteBuffer[numInterleaveStreams];
461-
467+
final int[] uncompressedLengths = new int[numInterleaveStreams];
468+
final ByteBuffer[] TransposedData = new ByteBuffer[numInterleaveStreams];
462469
for ( int j=0; j<numInterleaveStreams; j++){
463-
ulen[j] = (int) Math.floor(((double) outSize)/numInterleaveStreams);
470+
uncompressedLengths[j] = (int) Math.floor(((double) outSize)/numInterleaveStreams);
464471
if ((outSize % numInterleaveStreams) > j){
465-
ulen[j]++;
472+
uncompressedLengths[j]++;
466473
}
467474

468-
T[j] = uncompress(inBuffer, ulen[j]);
475+
TransposedData[j] = uncompress(inBuffer, uncompressedLengths[j]);
469476
}
470477

471478
// Transpose
472-
ByteBuffer out = ByteBuffer.allocate(outSize);
479+
final ByteBuffer outBuffer = ByteBuffer.allocate(outSize);
473480
for (int j = 0; j <numInterleaveStreams; j++) {
474-
for (int i = 0; i < ulen[j]; i++) {
475-
out.put((i*numInterleaveStreams)+j, T[j].get(i));
481+
for (int i = 0; i < uncompressedLengths[j]; i++) {
482+
outBuffer.put((i*numInterleaveStreams)+j, TransposedData[j].get(i));
476483
}
477484
}
478-
479-
return out;
485+
return outBuffer;
480486
}
481487

482488
}

src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java

+13-18
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN
3030
// NoSize
3131
if (!ransNx16Params.isNosz()) {
3232
// original size is not recorded
33-
int insize = inBuffer.remaining();
34-
Utils.writeUint7(insize,outBuffer);
33+
Utils.writeUint7(inBuffer.remaining(),outBuffer);
3534
}
3635

3736
ByteBuffer inputBuffer = inBuffer;
@@ -68,7 +67,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN
6867

6968
// RLE
7069
if (ransNx16Params.isRLE()){
71-
inputBuffer = encodeRLE(inputBuffer, ransNx16Params, outBuffer);
70+
inputBuffer = encodeRLE(inputBuffer, outBuffer);
7271
}
7372

7473

@@ -480,13 +479,10 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) {
480479
}
481480

482481
private void buildSymsOrder0(final int[] F) {
483-
final RANSEncodingSymbol[] syms = getEncodingSymbols()[0];
484-
// updates the RANSEncodingSymbol array for all the symbols
485482

486-
// TODO: commented out to suppress spotBugs warning
487-
//final int[] C = new int[Constants.NUMBER_OF_SYMBOLS];
483+
// updates all the encodingSymbols
484+
final RANSEncodingSymbol[] syms = getEncodingSymbols()[0];
488485

489-
// T = running sum of frequencies including the current symbol
490486
// F[j] = frequency of symbol "j"
491487
// cumulativeFreq = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j")
492488
int cumulativeFreq = 0;
@@ -515,44 +511,44 @@ private void buildSymsOrder1(final int[][] F) {
515511
}
516512
}
517513

518-
private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransParams, final ByteBuffer outBuffer){
514+
private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer){
519515

520516
// Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession.
521517
// spec: For symbols that occur many times in succession, we can replace them with a single symbol and a count.
522-
final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS];
518+
final int[] runCounts = new int[Constants.NUMBER_OF_SYMBOLS];
523519
int inputSize = inBuffer.remaining();
524520

525521
int lastSymbol = -1;
526522
for (int i = 0; i < inputSize; i++) {
527523
int currentSymbol = inBuffer.get(i)&0xFF;
528-
rleSymbols[currentSymbol] += (currentSymbol==lastSymbol ? 1:-1);
524+
runCounts[currentSymbol] += (currentSymbol==lastSymbol ? 1:-1);
529525
lastSymbol = currentSymbol;
530526
}
531527

532528
// numRLESymbols is the number of symbols that are run length encoded
533529
int numRLESymbols = 0;
534530
for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) {
535-
if (rleSymbols[i]>0) {
531+
if (runCounts[i]>0) {
536532
numRLESymbols++;
537533
}
538534
}
539535

540536
if (numRLESymbols==0) {
541537
// Format cannot cope with zero RLE symbols, so pick one!
542538
numRLESymbols = 1;
543-
rleSymbols[0] = 1;
539+
runCounts[0] = 1;
544540
}
545541

546542
// create rleMetaData buffer to store rle metadata.
547543
// This buffer will be compressed using compressOrder0WayN towards the end of this method
548544
// TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize
549545
ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData
550546

551-
// write number of symbols that are run length encoded to the outBuffer
547+
// write number of symbols that are run length encoded
552548
rleMetaData.put((byte) numRLESymbols);
553549

554-
for (int i=0; i<256; i++){
555-
if (rleSymbols[i] >0){
550+
for (int i=0; i<Constants.NUMBER_OF_SYMBOLS; i++){
551+
if (runCounts[i] >0){
556552
// write the symbols that are run length encoded
557553
rleMetaData.put((byte) i);
558554
}
@@ -566,7 +562,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar
566562

567563
for (int i = 0; i < inputSize; i++) {
568564
encodedData.put(encodedDataIdx++,inBuffer.get(i));
569-
if (rleSymbols[inBuffer.get(i)&0xFF]>0) {
565+
if (runCounts[inBuffer.get(i)&0xFF]>0) {
570566
lastSymbol = inBuffer.get(i) & 0xFF;
571567
int run = 0;
572568

@@ -585,7 +581,6 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar
585581

586582
encodedData.limit(encodedDataIdx);
587583
// limit and rewind
588-
// TODO: check if position of rleMetadata is at the end of the buffer as expected
589584
rleMetaData.limit(rleMetaData.position());
590585
rleMetaData.rewind();
591586

0 commit comments

Comments
 (0)