diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java index 2d35c60635..3ae46a76a1 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java @@ -3,7 +3,12 @@ final public class Constants { public static final int TOTAL_FREQ_SHIFT = 12; public static final int TOTAL_FREQ = (1 << TOTAL_FREQ_SHIFT); // 4096 + public static final int NUMBER_OF_SYMBOLS = 256; public static final int RANS_4x8_LOWER_BOUND = 1 << 23; + public static final int RANS_4x8_NUM_INTERLEAVED_STREAMS = 4; + public static final int RANS_4x8_ORDER_BYTE_LENGTH = 1; + public static final int RANS_4x8_COMPRESSED_BYTE_LENGTH = 4; + public static final int RANS_4x8_RAW_BYTE_LENGTH = 4; + public static final int RANS_4x8_PREFIX_BYTE_LENGTH = RANS_4x8_ORDER_BYTE_LENGTH + RANS_4x8_COMPRESSED_BYTE_LENGTH + RANS_4x8_RAW_BYTE_LENGTH; public static final int RANS_Nx16_LOWER_BOUND = 1 << 15; - public static final int NUMBER_OF_SYMBOLS = 256; } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 6f21539500..d6763ed6f6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -47,6 +47,26 @@ protected ByteBuffer allocateOutputBuffer(final int inSize) { return outputBuffer; } - //TODO: add buildSymbols0 and buildSymbols1 + protected void buildSymsOrder0(final int[] frequencies) { + updateEncodingSymbols(frequencies, getEncodingSymbols()[0]); + } + + protected void buildSymsOrder1(final int[][] frequencies) { + final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + updateEncodingSymbols(frequencies[i], encodingSymbols[i]); + } + } + + private void updateEncodingSymbols(int[] frequencies, RANSEncodingSymbol[] encodingSymbols) { + int cumulativeFreq = 0; + for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { + if (frequencies[symbol] != 0) { + //For each symbol, set start = cumulative frequency and freq = frequencies[symbol] + encodingSymbols[symbol].set(cumulativeFreq, frequencies[symbol], Constants.TOTAL_FREQ_SHIFT); + cumulativeFreq += frequencies[symbol]; + } + } + } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 54d19a5e17..5f93ae58c4 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -22,12 +22,12 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { return EMPTY_BUFFER; } - // first byte of compressed stream gives order - final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get()); - // For RANS decoding, the bytes are read in little endian from the input stream inBuffer.order(ByteOrder.LITTLE_ENDIAN); + // first byte of compressed stream gives order + final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get()); + // compressed bytes length final int inSize = inBuffer.getInt(); if (inSize != inBuffer.remaining() - RAW_BYTE_LENGTH) { @@ -132,7 +132,6 @@ private void uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer ou final int out_sz = outBuffer.remaining(); long rans0, rans1, rans2, rans7; - inBuffer.order(ByteOrder.LITTLE_ENDIAN); rans0 = inBuffer.getInt(); rans1 = inBuffer.getInt(); rans2 = inBuffer.getInt(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 20331f9c69..0d962baf82 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -11,17 +11,12 @@ import java.nio.ByteOrder; public class RANS4x8Encode extends RANSEncode { - private static final int ORDER_BYTE_LENGTH = 1; - private static final int COMPRESSED_BYTE_LENGTH = 4; - private static final int RAW_BYTE_LENGTH = 4; - private static final int PREFIX_BYTE_LENGTH = ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH + RAW_BYTE_LENGTH; // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, // so always use ORDER-0 private static final int MINIMUM__ORDER_1_SIZE = 4; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); - public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; @@ -45,23 +40,22 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params } private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { - final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = allocateOutputBuffer(inSize); + final int inputSize = inBuffer.remaining(); + final ByteBuffer outBuffer = allocateOutputBuffer(inputSize); // move the output buffer ahead to the start of the frequency table (we'll come back and // write the output stream prefix at the end of this method) - outBuffer.position(PREFIX_BYTE_LENGTH); // start of frequency table + outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // start of frequency table // get the normalised frequencies of the alphabets - final int[] F = calcFrequenciesOrder0(inBuffer); + final int[] normalizedFreq = calcFrequenciesOrder0(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - buildSymsOrder0(F); - + buildSymsOrder0(normalizedFreq); final ByteBuffer cp = outBuffer.slice(); // write Frequency table - final int frequencyTableSize = writeFrequenciesOrder0(cp, F); + final int frequencyTableSize = writeFrequenciesOrder0(cp, normalizedFreq); inBuffer.rewind(); @@ -108,7 +102,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { inBuffer.position(inBuffer.limit()); // write the prefix at the beginning of the output buffer - writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inSize, frequencyTableSize, cdata_size); + writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inputSize, frequencyTableSize, cdata_size); return outBuffer; } @@ -117,16 +111,16 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final ByteBuffer outBuffer = allocateOutputBuffer(inSize); // move to start of frequency - outBuffer.position(PREFIX_BYTE_LENGTH); + outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // get normalized frequencies - final int[][] F = calcFrequenciesOrder1(inBuffer); + final int[][] normalizedFreq = calcFrequenciesOrder1(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - buildSymsOrder1(F); + buildSymsOrder1(normalizedFreq); final ByteBuffer cp = outBuffer.slice(); - final int frequencyTableSize = writeFrequenciesOrder1(cp, F); + final int frequencyTableSize = writeFrequenciesOrder1(cp, normalizedFreq); inBuffer.rewind(); final int in_size = inBuffer.remaining(); long rans0, rans1, rans2, rans3; @@ -214,16 +208,16 @@ private static void writeCompressionPrefix( final int frequencyTableSize, final int compressedBlobSize) { ValidationUtils.validateArg(order == RANSParams.ORDER.ONE || order == RANSParams.ORDER.ZERO,"unrecognized RANS order"); - outBuffer.limit(PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize); + outBuffer.limit(Constants.RANS_4x8_PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize); // go back to the beginning of the stream and write the prefix values // write the (ORDER as a single byte at offset 0) outBuffer.put(0, (byte) (order == RANSParams.ORDER.ZERO ? 0 : 1)); outBuffer.order(ByteOrder.LITTLE_ENDIAN); // move past the ORDER and write the compressed size - outBuffer.putInt(ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); + outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); // move past the compressed size and write the uncompressed size - outBuffer.putInt(ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH, inSize); + outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH + Constants.RANS_4x8_COMPRESSED_BYTE_LENGTH, inSize); outBuffer.rewind(); } @@ -333,36 +327,6 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { return F; } - private void buildSymsOrder0(final int[] F) { - final RANSEncodingSymbol[] encodingSymbols = getEncodingSymbols()[0]; - - // T = running sum of frequencies including the current symbol - // F[j] = frequency of symbol "j" - // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") - int cumulativeFreq = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - //For each symbol, set start = cumulative frequency and freq = frequency - encodingSymbols[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F[j]; - } - } - } - - private void buildSymsOrder1(final int[][] F) { - final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - final int[] F_i_ = F[i]; - int cumulativeFreq = 0; - for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { - if (F_i_[symbol] != 0) { - encodingSymbols[i][symbol].set(cumulativeFreq, F_i_[symbol], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F_i_[symbol]; - } - } - } - } - private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { final int start = cp.position(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index e493dd5364..afc0eee3ba 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -197,7 +197,6 @@ private void uncompressOrder1WayN( final int shift = frequencyTableFirstByte >> 4; readFrequencyTableOrder1(freqTableSource, shift); final int outputSize = outBuffer.remaining(); - inBuffer.order(ByteOrder.LITTLE_ENDIAN); // Nway parallel rans states. Nway = 4 or 32 final int Nway = ransNx16Params.getNumInterleavedRANSStates(); @@ -460,21 +459,21 @@ private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ // Decode the compressed interleaved stream final int[] uncompressedLengths = new int[numInterleaveStreams]; - final ByteBuffer[] TransposedData = new ByteBuffer[numInterleaveStreams]; + final ByteBuffer[] transposedData = new ByteBuffer[numInterleaveStreams]; for ( int j=0; j j){ uncompressedLengths[j]++; } - TransposedData[j] = uncompress(inBuffer, uncompressedLengths[j]); + transposedData[j] = uncompress(inBuffer, uncompressedLengths[j]); } // Transpose final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); for (int j = 0; j > 2) : (inputSize >> 5); int remainingSize = inputSize - (interleaveSize * Nway); int reverseIndex = 1; + final long[] rans = new long[Nway]; + // initialize rans states + for (int r=0; r0){ // encode remaining elements first - int remainingSymbol =0xFF & inBuffer.get(inputSize - reverseIndex); + int remainingSymbol = 0xFF & inBuffer.get(inputSize - reverseIndex); rans[remainingSize - 1] = ransEncodingSymbols[remainingSymbol].putSymbolNx16(rans[remainingSize - 1], ptr); remainingSize --; reverseIndex ++; @@ -244,8 +243,8 @@ private void compressOrder1WayN ( // normalise frequencies with a constant shift Utils.normaliseFrequenciesOrder1Shift(frequencies, Constants.TOTAL_FREQ_SHIFT); - // set encoding symbol - buildSymsOrder1(frequencies); // TODO: move into utils + // using the normalised frequencies, set the RANSEncodingSymbols + buildSymsOrder1(frequencies); // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 final int Nway = ransNx16Params.getNumInterleavedRANSStates(); @@ -469,39 +468,6 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) { cp.put((byte) 0); } - private void buildSymsOrder0(final int[] F) { - - // updates all the encodingSymbols - final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; - - // F[j] = frequency of symbol "j" - // cumulativeFreq = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") - int cumulativeFreq = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - - //For each symbol, set start = cumulative frequency and freq = frequency - syms[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F[j]; - } - } - } - - private void buildSymsOrder1(final int[][] F) { - // TODO: Call buildSymsOrder0 from buildSymsOrder1 - final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - final int[] F_i_ = F[i]; - int cumulativeFreq = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F_i_[j] != 0) { - encodingSymbols[i][j].set(cumulativeFreq, F_i_[j], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F_i_[j]; - } - } - } - } - private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer){ // Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession. diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 0ba31a4c4b..78ce092ff5 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -186,7 +186,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > 10); Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); - Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); + Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH); Assert.assertEquals(compressed.getInt(), rawSize); } @@ -234,7 +234,7 @@ public void testRans4x8Header( // first byte of compressed data gives the order Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); // the next 4 bytes gives the compressed size - Assert.assertEquals(compressed.getInt(), compressed.limit() - 9); + Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // the next 4 bytes gives the uncompressed size Assert.assertEquals(compressed.getInt(), rawData.limit()); }