Skip to content

Commit d2802b1

Browse files
author
yash-puligundla
committed
Addressing the feedback from Nov 7 and Nov 20 - part 2
1 parent 7126507 commit d2802b1

File tree

7 files changed

+58
-105
lines changed

7 files changed

+58
-105
lines changed

src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
final public class Constants {
44
public static final int TOTAL_FREQ_SHIFT = 12;
55
public static final int TOTAL_FREQ = (1 << TOTAL_FREQ_SHIFT); // 4096
6+
public static final int NUMBER_OF_SYMBOLS = 256;
67
public static final int RANS_4x8_LOWER_BOUND = 1 << 23;
8+
public static final int RANS_4x8_NUM_INTERLEAVED_STREAMS = 4;
9+
public static final int RANS_4x8_ORDER_BYTE_LENGTH = 1;
10+
public static final int RANS_4x8_COMPRESSED_BYTE_LENGTH = 4;
11+
public static final int RANS_4x8_RAW_BYTE_LENGTH = 4;
12+
public static final int RANS_4x8_PREFIX_BYTE_LENGTH = RANS_4x8_ORDER_BYTE_LENGTH + RANS_4x8_COMPRESSED_BYTE_LENGTH + RANS_4x8_RAW_BYTE_LENGTH;
713
public static final int RANS_Nx16_LOWER_BOUND = 1 << 15;
8-
public static final int NUMBER_OF_SYMBOLS = 256;
914
}

src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java

+21-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,26 @@ protected ByteBuffer allocateOutputBuffer(final int inSize) {
4747
return outputBuffer;
4848
}
4949

50-
//TODO: add buildSymbols0 and buildSymbols1
50+
protected void buildSymsOrder0(final int[] frequencies) {
51+
updateEncodingSymbols(frequencies, getEncodingSymbols()[0]);
52+
}
53+
54+
protected void buildSymsOrder1(final int[][] frequencies) {
55+
final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols();
56+
for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) {
57+
updateEncodingSymbols(frequencies[i], encodingSymbols[i]);
58+
}
59+
}
60+
61+
private void updateEncodingSymbols(int[] frequencies, RANSEncodingSymbol[] encodingSymbols) {
62+
int cumulativeFreq = 0;
63+
for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) {
64+
if (frequencies[symbol] != 0) {
65+
//For each symbol, set start = cumulative frequency and freq = frequencies[symbol]
66+
encodingSymbols[symbol].set(cumulativeFreq, frequencies[symbol], Constants.TOTAL_FREQ_SHIFT);
67+
cumulativeFreq += frequencies[symbol];
68+
}
69+
}
70+
}
5171

5272
}

src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java

+3-4
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) {
2222
return EMPTY_BUFFER;
2323
}
2424

25-
// first byte of compressed stream gives order
26-
final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get());
27-
2825
// For RANS decoding, the bytes are read in little endian from the input stream
2926
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
3027

28+
// first byte of compressed stream gives order
29+
final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get());
30+
3131
// compressed bytes length
3232
final int inSize = inBuffer.getInt();
3333
if (inSize != inBuffer.remaining() - RAW_BYTE_LENGTH) {
@@ -132,7 +132,6 @@ private void uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer ou
132132

133133
final int out_sz = outBuffer.remaining();
134134
long rans0, rans1, rans2, rans7;
135-
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
136135
rans0 = inBuffer.getInt();
137136
rans1 = inBuffer.getInt();
138137
rans2 = inBuffer.getInt();

src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java

+14-50
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,12 @@
1111
import java.nio.ByteOrder;
1212

1313
public class RANS4x8Encode extends RANSEncode<RANS4x8Params> {
14-
private static final int ORDER_BYTE_LENGTH = 1;
15-
private static final int COMPRESSED_BYTE_LENGTH = 4;
16-
private static final int RAW_BYTE_LENGTH = 4;
17-
private static final int PREFIX_BYTE_LENGTH = ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH + RAW_BYTE_LENGTH;
1814

1915
// streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding,
2016
// so always use ORDER-0
2117
private static final int MINIMUM__ORDER_1_SIZE = 4;
2218
private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0);
2319

24-
2520
public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) {
2621
if (inBuffer.remaining() == 0) {
2722
return EMPTY_BUFFER;
@@ -45,23 +40,22 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params
4540
}
4641

4742
private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) {
48-
final int inSize = inBuffer.remaining();
49-
final ByteBuffer outBuffer = allocateOutputBuffer(inSize);
43+
final int inputSize = inBuffer.remaining();
44+
final ByteBuffer outBuffer = allocateOutputBuffer(inputSize);
5045

5146
// move the output buffer ahead to the start of the frequency table (we'll come back and
5247
// write the output stream prefix at the end of this method)
53-
outBuffer.position(PREFIX_BYTE_LENGTH); // start of frequency table
48+
outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // start of frequency table
5449

5550
// get the normalised frequencies of the alphabets
56-
final int[] F = calcFrequenciesOrder0(inBuffer);
51+
final int[] normalizedFreq = calcFrequenciesOrder0(inBuffer);
5752

5853
// using the normalised frequencies, set the RANSEncodingSymbols
59-
buildSymsOrder0(F);
60-
54+
buildSymsOrder0(normalizedFreq);
6155
final ByteBuffer cp = outBuffer.slice();
6256

6357
// write Frequency table
64-
final int frequencyTableSize = writeFrequenciesOrder0(cp, F);
58+
final int frequencyTableSize = writeFrequenciesOrder0(cp, normalizedFreq);
6559

6660
inBuffer.rewind();
6761

@@ -108,7 +102,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) {
108102
inBuffer.position(inBuffer.limit());
109103

110104
// write the prefix at the beginning of the output buffer
111-
writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inSize, frequencyTableSize, cdata_size);
105+
writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inputSize, frequencyTableSize, cdata_size);
112106
return outBuffer;
113107
}
114108

@@ -117,16 +111,16 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) {
117111
final ByteBuffer outBuffer = allocateOutputBuffer(inSize);
118112

119113
// move to start of frequency
120-
outBuffer.position(PREFIX_BYTE_LENGTH);
114+
outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH);
121115

122116
// get normalized frequencies
123-
final int[][] F = calcFrequenciesOrder1(inBuffer);
117+
final int[][] normalizedFreq = calcFrequenciesOrder1(inBuffer);
124118

125119
// using the normalised frequencies, set the RANSEncodingSymbols
126-
buildSymsOrder1(F);
120+
buildSymsOrder1(normalizedFreq);
127121

128122
final ByteBuffer cp = outBuffer.slice();
129-
final int frequencyTableSize = writeFrequenciesOrder1(cp, F);
123+
final int frequencyTableSize = writeFrequenciesOrder1(cp, normalizedFreq);
130124
inBuffer.rewind();
131125
final int in_size = inBuffer.remaining();
132126
long rans0, rans1, rans2, rans3;
@@ -214,16 +208,16 @@ private static void writeCompressionPrefix(
214208
final int frequencyTableSize,
215209
final int compressedBlobSize) {
216210
ValidationUtils.validateArg(order == RANSParams.ORDER.ONE || order == RANSParams.ORDER.ZERO,"unrecognized RANS order");
217-
outBuffer.limit(PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize);
211+
outBuffer.limit(Constants.RANS_4x8_PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize);
218212

219213
// go back to the beginning of the stream and write the prefix values
220214
// write the (ORDER as a single byte at offset 0)
221215
outBuffer.put(0, (byte) (order == RANSParams.ORDER.ZERO ? 0 : 1));
222216
outBuffer.order(ByteOrder.LITTLE_ENDIAN);
223217
// move past the ORDER and write the compressed size
224-
outBuffer.putInt(ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize);
218+
outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize);
225219
// move past the compressed size and write the uncompressed size
226-
outBuffer.putInt(ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH, inSize);
220+
outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH + Constants.RANS_4x8_COMPRESSED_BYTE_LENGTH, inSize);
227221
outBuffer.rewind();
228222
}
229223

@@ -333,36 +327,6 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) {
333327
return F;
334328
}
335329

336-
private void buildSymsOrder0(final int[] F) {
337-
final RANSEncodingSymbol[] encodingSymbols = getEncodingSymbols()[0];
338-
339-
// T = running sum of frequencies including the current symbol
340-
// F[j] = frequency of symbol "j"
341-
// C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j")
342-
int cumulativeFreq = 0;
343-
for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) {
344-
if (F[j] != 0) {
345-
//For each symbol, set start = cumulative frequency and freq = frequency
346-
encodingSymbols[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT);
347-
cumulativeFreq += F[j];
348-
}
349-
}
350-
}
351-
352-
private void buildSymsOrder1(final int[][] F) {
353-
final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols();
354-
for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) {
355-
final int[] F_i_ = F[i];
356-
int cumulativeFreq = 0;
357-
for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) {
358-
if (F_i_[symbol] != 0) {
359-
encodingSymbols[i][symbol].set(cumulativeFreq, F_i_[symbol], Constants.TOTAL_FREQ_SHIFT);
360-
cumulativeFreq += F_i_[symbol];
361-
}
362-
}
363-
}
364-
}
365-
366330
private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) {
367331
final int start = cp.position();
368332

src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java

+3-4
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,6 @@ private void uncompressOrder1WayN(
197197
final int shift = frequencyTableFirstByte >> 4;
198198
readFrequencyTableOrder1(freqTableSource, shift);
199199
final int outputSize = outBuffer.remaining();
200-
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
201200

202201
// Nway parallel rans states. Nway = 4 or 32
203202
final int Nway = ransNx16Params.getNumInterleavedRANSStates();
@@ -460,21 +459,21 @@ private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){
460459

461460
// Decode the compressed interleaved stream
462461
final int[] uncompressedLengths = new int[numInterleaveStreams];
463-
final ByteBuffer[] TransposedData = new ByteBuffer[numInterleaveStreams];
462+
final ByteBuffer[] transposedData = new ByteBuffer[numInterleaveStreams];
464463
for ( int j=0; j<numInterleaveStreams; j++){
465464
uncompressedLengths[j] = (int) Math.floor(((double) outSize)/numInterleaveStreams);
466465
if ((outSize % numInterleaveStreams) > j){
467466
uncompressedLengths[j]++;
468467
}
469468

470-
TransposedData[j] = uncompress(inBuffer, uncompressedLengths[j]);
469+
transposedData[j] = uncompress(inBuffer, uncompressedLengths[j]);
471470
}
472471

473472
// Transpose
474473
final ByteBuffer outBuffer = ByteBuffer.allocate(outSize);
475474
for (int j = 0; j <numInterleaveStreams; j++) {
476475
for (int i = 0; i < uncompressedLengths[j]; i++) {
477-
outBuffer.put((i*numInterleaveStreams)+j, TransposedData[j].get(i));
476+
outBuffer.put((i*numInterleaveStreams)+j, transposedData[j].get(i));
478477
}
479478
}
480479
return outBuffer;

src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java

+9-43
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,10 @@ private void compressOrder0WayN (
134134
Utils.normaliseFrequenciesOrder0Shift(F, Constants.TOTAL_FREQ_SHIFT);
135135
}
136136

137-
// update the RANS Encoding Symbols
137+
// using the normalised frequencies, set the RANSEncodingSymbols
138138
buildSymsOrder0(F);
139139
inBuffer.rewind();
140140
final int Nway = ransNx16Params.getNumInterleavedRANSStates();
141-
final long[] rans = new long[Nway];
142-
143-
// initialize rans states
144-
for (int r=0; r<Nway; r++){
145-
rans[r] = Constants.RANS_Nx16_LOWER_BOUND;
146-
}
147141

148142
// number of remaining elements = inputSize % Nway = inputSize - (interleaveSize * Nway)
149143
// For Nway = 4, division by 4 is the same as right shift by 2 bits
@@ -152,14 +146,19 @@ private void compressOrder0WayN (
152146
final int interleaveSize = (Nway == 4) ? (inputSize >> 2) : (inputSize >> 5);
153147
int remainingSize = inputSize - (interleaveSize * Nway);
154148
int reverseIndex = 1;
149+
final long[] rans = new long[Nway];
155150

151+
// initialize rans states
152+
for (int r=0; r<Nway; r++){
153+
rans[r] = Constants.RANS_Nx16_LOWER_BOUND;
154+
}
156155
final ByteBuffer ptr = cp.slice();
157156
final RANSEncodingSymbol[] ransEncodingSymbols = getEncodingSymbols()[0];
158157
// encoded in LIFO order
159158
while (remainingSize>0){
160159

161160
// encode remaining elements first
162-
int remainingSymbol =0xFF & inBuffer.get(inputSize - reverseIndex);
161+
int remainingSymbol = 0xFF & inBuffer.get(inputSize - reverseIndex);
163162
rans[remainingSize - 1] = ransEncodingSymbols[remainingSymbol].putSymbolNx16(rans[remainingSize - 1], ptr);
164163
remainingSize --;
165164
reverseIndex ++;
@@ -244,8 +243,8 @@ private void compressOrder1WayN (
244243
// normalise frequencies with a constant shift
245244
Utils.normaliseFrequenciesOrder1Shift(frequencies, Constants.TOTAL_FREQ_SHIFT);
246245

247-
// set encoding symbol
248-
buildSymsOrder1(frequencies); // TODO: move into utils
246+
// using the normalised frequencies, set the RANSEncodingSymbols
247+
buildSymsOrder1(frequencies);
249248

250249
// uncompress for Nway = 4. then extend Nway to be variable - 4 or 32
251250
final int Nway = ransNx16Params.getNumInterleavedRANSStates();
@@ -469,39 +468,6 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) {
469468
cp.put((byte) 0);
470469
}
471470

472-
private void buildSymsOrder0(final int[] F) {
473-
474-
// updates all the encodingSymbols
475-
final RANSEncodingSymbol[] syms = getEncodingSymbols()[0];
476-
477-
// F[j] = frequency of symbol "j"
478-
// cumulativeFreq = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j")
479-
int cumulativeFreq = 0;
480-
for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) {
481-
if (F[j] != 0) {
482-
483-
//For each symbol, set start = cumulative frequency and freq = frequency
484-
syms[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT);
485-
cumulativeFreq += F[j];
486-
}
487-
}
488-
}
489-
490-
private void buildSymsOrder1(final int[][] F) {
491-
// TODO: Call buildSymsOrder0 from buildSymsOrder1
492-
final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols();
493-
for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) {
494-
final int[] F_i_ = F[i];
495-
int cumulativeFreq = 0;
496-
for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) {
497-
if (F_i_[j] != 0) {
498-
encodingSymbols[i][j].set(cumulativeFreq, F_i_[j], Constants.TOTAL_FREQ_SHIFT);
499-
cumulativeFreq += F_i_[j];
500-
}
501-
}
502-
}
503-
}
504-
505471
private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer){
506472

507473
// Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession.

src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations(
186186
final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params);
187187
Assert.assertTrue(compressed.limit() > 10);
188188
Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal());
189-
Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4);
189+
Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH);
190190
Assert.assertEquals(compressed.getInt(), rawSize);
191191
}
192192

@@ -234,7 +234,7 @@ public void testRans4x8Header(
234234
// first byte of compressed data gives the order
235235
Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal());
236236
// the next 4 bytes gives the compressed size
237-
Assert.assertEquals(compressed.getInt(), compressed.limit() - 9);
237+
Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH);
238238
// the next 4 bytes gives the uncompressed size
239239
Assert.assertEquals(compressed.getInt(), rawData.limit());
240240
}

0 commit comments

Comments
 (0)